Merge branch 'main' into feat/cli

This commit is contained in:
Xiyuan Chen
2026-05-25 01:44:50 -07:00
committed by GitHub
105 changed files with 1410 additions and 908 deletions

View File

@ -55,10 +55,14 @@ class AgentBackendModelConfig(BaseModel):
class AgentBackendOutputConfig(BaseModel):
"""API-side structured output declaration for the conventional output layer."""
"""API-side structured output declaration for the conventional output layer.
The structured-output tool name is fixed to ``final_output`` inside
``dify_agent.layers.output`` so callers only control the JSON Schema plus
optional description/strictness metadata.
"""
json_schema: dict[str, JsonValue]
name: str = "final_result"
description: str | None = None
strict: bool | None = None
@ -153,7 +157,6 @@ class AgentBackendRunRequestBuilder:
metadata=run_input.metadata,
config=DifyOutputLayerConfig(
json_schema=run_input.output.json_schema,
name=run_input.output.name,
description=run_input.output.description,
strict=run_input.output.strict,
),

View File

@ -134,7 +134,7 @@ class CompletionConversationApi(Resource):
.join( # type: ignore
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
.distinct()
.group_by(Conversation.id)
)
elif args.annotation_status == "not_annotated":
query = (
@ -272,7 +272,7 @@ class ChatConversationApi(Resource):
.join( # type: ignore
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
.distinct()
.group_by(Conversation.id)
)
case "not_annotated":
query = (

View File

@ -417,7 +417,7 @@ class MessageApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_model, message_id: str):
def get(self, app_model, message_id: UUID):
message_id_str = str(message_id)
message = db.session.scalar(

View File

@ -2,6 +2,7 @@ import logging
from collections.abc import Callable
from functools import wraps
from typing import Any, TypedDict
from uuid import UUID
from flask import Response, request
from flask_restx import Resource, fields, marshal, marshal_with
@ -345,14 +346,15 @@ class VariableApi(Resource):
@console_ns.response(404, "Variable not found")
@_api_prerequisite
@marshal_with(workflow_draft_variable_model)
def get(self, app_model: App, variable_id: str):
def get(self, app_model: App, variable_id: UUID):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
variable_id_str = str(variable_id)
variable = _ensure_variable_access(
variable=draft_var_srv.get_variable(variable_id=variable_id),
variable=draft_var_srv.get_variable(variable_id=variable_id_str),
app_id=app_model.id,
variable_id=variable_id,
variable_id=variable_id_str,
)
return variable
@ -363,7 +365,7 @@ class VariableApi(Resource):
@console_ns.response(404, "Variable not found")
@_api_prerequisite
@marshal_with(workflow_draft_variable_model)
def patch(self, app_model: App, variable_id: str):
def patch(self, app_model: App, variable_id: UUID):
# Request payload for file types:
#
# Local File:
@ -390,10 +392,11 @@ class VariableApi(Resource):
)
args_model = WorkflowDraftVariableUpdatePayload.model_validate(console_ns.payload or {})
variable_id_str = str(variable_id)
variable = _ensure_variable_access(
variable=draft_var_srv.get_variable(variable_id=variable_id),
variable=draft_var_srv.get_variable(variable_id=variable_id_str),
app_id=app_model.id,
variable_id=variable_id,
variable_id=variable_id_str,
)
new_name = args_model.name
@ -434,14 +437,15 @@ class VariableApi(Resource):
@console_ns.response(204, "Variable deleted successfully")
@console_ns.response(404, "Variable not found")
@_api_prerequisite
def delete(self, app_model: App, variable_id: str):
def delete(self, app_model: App, variable_id: UUID):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
variable_id_str = str(variable_id)
variable = _ensure_variable_access(
variable=draft_var_srv.get_variable(variable_id=variable_id),
variable=draft_var_srv.get_variable(variable_id=variable_id_str),
app_id=app_model.id,
variable_id=variable_id,
variable_id=variable_id_str,
)
draft_var_srv.delete_variable(variable)
db.session.commit()
@ -457,7 +461,7 @@ class VariableResetApi(Resource):
@console_ns.response(204, "Variable reset (no content)")
@console_ns.response(404, "Variable not found")
@_api_prerequisite
def put(self, app_model: App, variable_id: str):
def put(self, app_model: App, variable_id: UUID):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
@ -468,10 +472,11 @@ class VariableResetApi(Resource):
raise NotFoundError(
f"Draft workflow not found, app_id={app_model.id}",
)
variable_id_str = str(variable_id)
variable = _ensure_variable_access(
variable=draft_var_srv.get_variable(variable_id=variable_id),
variable=draft_var_srv.get_variable(variable_id=variable_id_str),
app_id=app_model.id,
variable_id=variable_id,
variable_id=variable_id_str,
)
resetted = draft_var_srv.reset_variable(draft_workflow, variable)

View File

@ -189,7 +189,7 @@ class WorkflowRunExportApi(Resource):
@login_required
@account_initialization_required
@get_app_model()
def get(self, app_model: App, run_id: str):
def get(self, app_model: App, run_id: UUID):
tenant_id = str(app_model.tenant_id)
app_id = str(app_model.id)
run_id_str = str(run_id)

View File

@ -979,7 +979,7 @@ class DocumentDownloadApi(DocumentResource):
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def get(self, dataset_id: str, document_id: str) -> dict[str, Any]:
def get(self, dataset_id: UUID, document_id: UUID) -> dict[str, Any]:
# Reuse the shared permission/tenant checks implemented in DocumentResource.
document = self.get_document(str(dataset_id), str(document_id))
return {"url": DocumentService.get_document_download_url(document)}
@ -996,7 +996,7 @@ class DocumentBatchDownloadZipApi(DocumentResource):
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
@console_ns.expect(console_ns.models[DocumentBatchDownloadZipPayload.__name__])
def post(self, dataset_id: str):
def post(self, dataset_id: UUID):
"""Stream a ZIP archive containing the requested uploaded documents."""
# Parse and validate request payload.
payload = DocumentBatchDownloadZipPayload.model_validate(console_ns.payload or {})

View File

@ -1,6 +1,7 @@
import logging
from collections.abc import Callable
from typing import Any, NoReturn
from uuid import UUID
from flask import Response, request
from flask_restx import Resource, marshal, marshal_with
@ -168,21 +169,22 @@ class RagPipelineVariableApi(Resource):
@_api_prerequisite
@marshal_with(workflow_draft_variable_model)
def get(self, pipeline: Pipeline, variable_id: str):
def get(self, pipeline: Pipeline, variable_id: UUID):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
variable = draft_var_srv.get_variable(variable_id=variable_id)
variable_id_str = str(variable_id)
variable = draft_var_srv.get_variable(variable_id=variable_id_str)
if variable is None:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
if variable.app_id != pipeline.id:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
return variable
@_api_prerequisite
@marshal_with(workflow_draft_variable_model)
@console_ns.expect(console_ns.models[WorkflowDraftVariablePatchPayload.__name__])
def patch(self, pipeline: Pipeline, variable_id: str):
def patch(self, pipeline: Pipeline, variable_id: UUID):
# Request payload for file types:
#
# Local File:
@ -210,11 +212,12 @@ class RagPipelineVariableApi(Resource):
payload = WorkflowDraftVariablePatchPayload.model_validate(console_ns.payload or {})
args = payload.model_dump(exclude_none=True)
variable = draft_var_srv.get_variable(variable_id=variable_id)
variable_id_str = str(variable_id)
variable = draft_var_srv.get_variable(variable_id=variable_id_str)
if variable is None:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
if variable.app_id != pipeline.id:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
new_name = args.get(self._PATCH_NAME_FIELD, None)
raw_value = args.get(self._PATCH_VALUE_FIELD, None)
@ -250,15 +253,16 @@ class RagPipelineVariableApi(Resource):
return variable
@_api_prerequisite
def delete(self, pipeline: Pipeline, variable_id: str):
def delete(self, pipeline: Pipeline, variable_id: UUID):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
variable = draft_var_srv.get_variable(variable_id=variable_id)
variable_id_str = str(variable_id)
variable = draft_var_srv.get_variable(variable_id=variable_id_str)
if variable is None:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
if variable.app_id != pipeline.id:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
draft_var_srv.delete_variable(variable)
db.session.commit()
return Response("", 204)
@ -267,7 +271,7 @@ class RagPipelineVariableApi(Resource):
@console_ns.route("/rag/pipelines/<uuid:pipeline_id>/workflows/draft/variables/<uuid:variable_id>/reset")
class RagPipelineVariableResetApi(Resource):
@_api_prerequisite
def put(self, pipeline: Pipeline, variable_id: str):
def put(self, pipeline: Pipeline, variable_id: UUID):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
@ -278,11 +282,12 @@ class RagPipelineVariableResetApi(Resource):
raise NotFoundError(
f"Draft workflow not found, pipeline_id={pipeline.id}",
)
variable = draft_var_srv.get_variable(variable_id=variable_id)
variable_id_str = str(variable_id)
variable = draft_var_srv.get_variable(variable_id=variable_id_str)
if variable is None:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
if variable.app_id != pipeline.id:
raise NotFoundError(description=f"variable not found, id={variable_id}")
raise NotFoundError(description=f"variable not found, id={variable_id_str}")
resetted = draft_var_srv.reset_variable(draft_workflow, variable)
db.session.commit()

View File

@ -901,7 +901,7 @@ class RagPipelineWorkflowRunNodeExecutionListApi(Resource):
@login_required
@account_initialization_required
@get_rag_pipeline
def get(self, pipeline: Pipeline, run_id: str):
def get(self, pipeline: Pipeline, run_id: UUID):
"""
Get workflow run node execution list
"""

View File

@ -174,11 +174,11 @@ class AnnotationUpdateDeleteApi(Resource):
)
@validate_app_token
@edit_permission_required
def put(self, app_model: App, annotation_id: str):
def put(self, app_model: App, annotation_id: UUID):
"""Update an existing annotation."""
payload = AnnotationCreatePayload.model_validate(service_api_ns.payload or {})
update_args: UpdateAnnotationArgs = {"question": payload.question, "answer": payload.answer}
annotation = AppAnnotationService.update_app_annotation_directly(update_args, app_model.id, annotation_id)
annotation = AppAnnotationService.update_app_annotation_directly(update_args, app_model.id, str(annotation_id))
response = Annotation.model_validate(annotation, from_attributes=True)
return response.model_dump(mode="json")
@ -195,7 +195,7 @@ class AnnotationUpdateDeleteApi(Resource):
)
@validate_app_token
@edit_permission_required
def delete(self, app_model: App, annotation_id: str):
def delete(self, app_model: App, annotation_id: UUID):
"""Delete an annotation."""
AppAnnotationService.delete_app_annotation(app_model.id, annotation_id)
AppAnnotationService.delete_app_annotation(app_model.id, str(annotation_id))
return "", 204

View File

@ -1,5 +1,6 @@
import logging
from urllib.parse import quote
from uuid import UUID
from flask import Response, request
from flask_restx import Resource
@ -50,20 +51,20 @@ class FilePreviewApi(Resource):
}
)
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY))
def get(self, app_model: App, end_user: EndUser, file_id: str):
def get(self, app_model: App, end_user: EndUser, file_id: UUID):
"""
Preview/Download a file that was uploaded via Service API.
Provides secure file preview/download functionality.
Files can only be accessed if they belong to messages within the requesting app's context.
"""
file_id = str(file_id)
file_id_str = str(file_id)
# Parse query parameters
args = FilePreviewQuery.model_validate(request.args.to_dict())
# Validate file ownership and get file objects
_, upload_file = self._validate_file_ownership(file_id, app_model.id)
_, upload_file = self._validate_file_ownership(file_id_str, app_model.id)
# Get file content generator
try:

View File

@ -1,5 +1,6 @@
from collections.abc import Generator
from typing import Any
from uuid import UUID
from flask import request
from pydantic import BaseModel
@ -64,10 +65,11 @@ class DatasourcePluginsApi(DatasetApiResource):
401: "Unauthorized - invalid API token",
}
)
def get(self, tenant_id: str, dataset_id: str):
def get(self, tenant_id: str, dataset_id: UUID):
"""Resource for getting datasource plugins."""
dataset_id_str = str(dataset_id)
# Verify dataset ownership
stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id)
stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str)
dataset = db.session.scalar(stmt)
if not dataset:
raise NotFound("Dataset not found.")
@ -77,7 +79,7 @@ class DatasourcePluginsApi(DatasetApiResource):
rag_pipeline_service: RagPipelineService = RagPipelineService()
datasource_plugins: list[dict[Any, Any]] = rag_pipeline_service.get_datasource_plugins(
tenant_id=tenant_id, dataset_id=dataset_id, is_published=is_published
tenant_id=tenant_id, dataset_id=dataset_id_str, is_published=is_published
)
return datasource_plugins, 200
@ -109,10 +111,11 @@ class DatasourceNodeRunApi(DatasetApiResource):
}
)
@service_api_ns.expect(service_api_ns.models[DatasourceNodeRunPayload.__name__])
def post(self, tenant_id: str, dataset_id: str, node_id: str):
def post(self, tenant_id: str, dataset_id: UUID, node_id: str):
"""Resource for getting datasource plugins."""
dataset_id_str = str(dataset_id)
# Verify dataset ownership
stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id)
stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str)
dataset = db.session.scalar(stmt)
if not dataset:
raise NotFound("Dataset not found.")
@ -120,7 +123,7 @@ class DatasourceNodeRunApi(DatasetApiResource):
payload = DatasourceNodeRunPayload.model_validate(service_api_ns.payload or {})
assert isinstance(current_user, Account)
rag_pipeline_service: RagPipelineService = RagPipelineService()
pipeline: Pipeline = rag_pipeline_service.get_pipeline(tenant_id=tenant_id, dataset_id=dataset_id)
pipeline: Pipeline = rag_pipeline_service.get_pipeline(tenant_id=tenant_id, dataset_id=dataset_id_str)
datasource_node_run_api_entity = DatasourceNodeRunApiEntity.model_validate(
{
**payload.model_dump(exclude_none=True),
@ -172,10 +175,11 @@ class PipelineRunApi(DatasetApiResource):
}
)
@service_api_ns.expect(service_api_ns.models[PipelineRunApiEntity.__name__])
def post(self, tenant_id: str, dataset_id: str):
def post(self, tenant_id: str, dataset_id: UUID):
"""Resource for running a rag pipeline."""
dataset_id_str = str(dataset_id)
# Verify dataset ownership
stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id)
stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str)
dataset = db.session.scalar(stmt)
if not dataset:
raise NotFound("Dataset not found.")
@ -186,7 +190,7 @@ class PipelineRunApi(DatasetApiResource):
raise Forbidden()
rag_pipeline_service: RagPipelineService = RagPipelineService()
pipeline: Pipeline = rag_pipeline_service.get_pipeline(tenant_id=tenant_id, dataset_id=dataset_id)
pipeline: Pipeline = rag_pipeline_service.get_pipeline(tenant_id=tenant_id, dataset_id=dataset_id_str)
try:
response: dict[Any, Any] | Generator[str, Any, None] = PipelineGenerateService.generate(
pipeline=pipeline,

View File

@ -1,4 +1,5 @@
from typing import Any
from uuid import UUID
from flask import request
from flask_restx import marshal
@ -107,17 +108,19 @@ class SegmentApi(DatasetApiResource):
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: str, document_id: str):
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
_, current_tenant_id = current_account_with_tenant()
"""Create single segment."""
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset.id, document_id)
document = DocumentService.get_document(dataset.id, document_id_str)
if not document:
raise NotFound("Document not found.")
if document.indexing_status != "completed":
@ -150,7 +153,10 @@ class SegmentApi(DatasetApiResource):
for args_item in payload.segments:
SegmentService.segment_create_args_validate(args_item, document)
segments = SegmentService.multi_create_segment(payload.segments, document, dataset)
return {"data": _marshal_segments_with_summary(segments, dataset_id), "doc_form": document.doc_form}, 200
return {
"data": _marshal_segments_with_summary(segments, dataset_id_str),
"doc_form": document.doc_form,
}, 200
else:
return {"error": "Segments is required"}, 400
@ -165,19 +171,21 @@ class SegmentApi(DatasetApiResource):
404: "Dataset or document not found",
}
)
def get(self, tenant_id: str, dataset_id: str, document_id: str):
def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
_, current_tenant_id = current_account_with_tenant()
"""Get segments."""
# check dataset
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
dataset_id_str = str(dataset_id)
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset.id, document_id)
document = DocumentService.get_document(dataset.id, document_id_str)
if not document:
raise NotFound("Document not found.")
# check embedding model setting
@ -205,7 +213,7 @@ class SegmentApi(DatasetApiResource):
)
segments, total = SegmentService.get_segments(
document_id=document_id,
document_id=document_id_str,
tenant_id=current_tenant_id,
status_list=args.status,
keyword=args.keyword,
@ -214,7 +222,7 @@ class SegmentApi(DatasetApiResource):
)
response = {
"data": _marshal_segments_with_summary(segments, dataset_id),
"data": _marshal_segments_with_summary(segments, dataset_id_str),
"doc_form": document.doc_form,
"total": total,
"has_more": len(segments) == limit,
@ -240,22 +248,25 @@ class DatasetSegmentApi(DatasetApiResource):
}
)
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def delete(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str):
def delete(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
_, current_tenant_id = current_account_with_tenant()
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset_id, document_id)
document = DocumentService.get_document(dataset_id_str, document_id_str)
if not document:
raise NotFound("Document not found.")
segment_id_str = str(segment_id)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
SegmentService.delete_segment(segment, document, dataset)
@ -276,18 +287,20 @@ class DatasetSegmentApi(DatasetApiResource):
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str):
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
_, current_tenant_id = current_account_with_tenant()
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset_id, document_id)
document = DocumentService.get_document(dataset_id_str, document_id_str)
if not document:
raise NotFound("Document not found.")
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
@ -306,15 +319,19 @@ class DatasetSegmentApi(DatasetApiResource):
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment_id_str = str(segment_id)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
payload = SegmentUpdatePayload.model_validate(service_api_ns.payload or {})
updated_segment = SegmentService.update_segment(payload.segment, segment, document, dataset)
return {"data": _marshal_segment_with_summary(updated_segment, dataset_id), "doc_form": document.doc_form}, 200
return {
"data": _marshal_segment_with_summary(updated_segment, dataset_id_str),
"doc_form": document.doc_form,
}, 200
@service_api_ns.doc("get_segment")
@service_api_ns.doc(description="Get a specific segment by ID")
@ -325,26 +342,29 @@ class DatasetSegmentApi(DatasetApiResource):
404: "Dataset, document, or segment not found",
}
)
def get(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str):
def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
_, current_tenant_id = current_account_with_tenant()
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset_id, document_id)
document = DocumentService.get_document(dataset_id_str, document_id_str)
if not document:
raise NotFound("Document not found.")
segment_id_str = str(segment_id)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
return {"data": _marshal_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
return {"data": _marshal_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200
@service_api_ns.route(
@ -369,23 +389,26 @@ class ChildChunkApi(DatasetApiResource):
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str):
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
_, current_tenant_id = current_account_with_tenant()
"""Create child chunk."""
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset.id, document_id)
document = DocumentService.get_document(dataset.id, document_id_str)
if not document:
raise NotFound("Document not found.")
segment_id_str = str(segment_id)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
@ -429,23 +452,26 @@ class ChildChunkApi(DatasetApiResource):
404: "Dataset, document, or segment not found",
}
)
def get(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str):
def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
_, current_tenant_id = current_account_with_tenant()
"""Get child chunks."""
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset.id, document_id)
document = DocumentService.get_document(dataset.id, document_id_str)
if not document:
raise NotFound("Document not found.")
segment_id_str = str(segment_id)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
@ -461,7 +487,9 @@ class ChildChunkApi(DatasetApiResource):
limit = min(args.limit, 100)
keyword = args.keyword
child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword)
child_chunks = SegmentService.get_child_chunks(
segment_id_str, document_id_str, dataset_id_str, page, limit, keyword
)
return {
"data": marshal(child_chunks.items, child_chunk_fields),
@ -497,32 +525,38 @@ class DatasetChildChunkApi(DatasetApiResource):
)
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def delete(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str, child_chunk_id: str):
def delete(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID):
_, current_tenant_id = current_account_with_tenant()
"""Delete child chunk."""
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
document_id_str = str(document_id)
# check document
document = DocumentService.get_document(dataset.id, document_id)
document = DocumentService.get_document(dataset.id, document_id_str)
if not document:
raise NotFound("Document not found.")
segment_id_str = str(segment_id)
# check segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
# validate segment belongs to the specified document
if str(segment.document_id) != str(document_id):
if str(segment.document_id) != str(document_id_str):
raise NotFound("Document not found.")
child_chunk_id_str = str(child_chunk_id)
# check child chunk
child_chunk = SegmentService.get_child_chunk_by_id(child_chunk_id=child_chunk_id, tenant_id=current_tenant_id)
child_chunk = SegmentService.get_child_chunk_by_id(
child_chunk_id=child_chunk_id_str, tenant_id=current_tenant_id
)
if not child_chunk:
raise NotFound("Child chunk not found.")
@ -558,32 +592,38 @@ class DatasetChildChunkApi(DatasetApiResource):
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def patch(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str, child_chunk_id: str):
def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID):
_, current_tenant_id = current_account_with_tenant()
"""Update child chunk."""
dataset_id_str = str(dataset_id)
# check dataset
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise NotFound("Dataset not found.")
document_id_str = str(document_id)
# get document
document = DocumentService.get_document(dataset_id, document_id)
document = DocumentService.get_document(dataset_id_str, document_id_str)
if not document:
raise NotFound("Document not found.")
segment_id_str = str(segment_id)
# get segment
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_tenant_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id_str, tenant_id=current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
# validate segment belongs to the specified document
if str(segment.document_id) != str(document_id):
if str(segment.document_id) != str(document_id_str):
raise NotFound("Segment not found.")
child_chunk_id_str = str(child_chunk_id)
# get child chunk
child_chunk = SegmentService.get_child_chunk_by_id(child_chunk_id=child_chunk_id, tenant_id=current_tenant_id)
child_chunk = SegmentService.get_child_chunk_by_id(
child_chunk_id=child_chunk_id_str, tenant_id=current_tenant_id
)
if not child_chunk:
raise NotFound("Child chunk not found.")

View File

@ -1,7 +1,7 @@
from datetime import datetime
from flask_restx import fields
from pydantic import field_validator
from pydantic import Field, field_validator
from fields.base import ResponseModel
from libs.helper import TimestampField, to_timestamp
@ -152,31 +152,41 @@ class DatasetRerankingModelResponse(ResponseModel):
class DatasetKeywordSettingResponse(ResponseModel):
keyword_weight: float
keyword_weight: float | None = None
class DatasetVectorSettingResponse(ResponseModel):
vector_weight: float
embedding_model_name: str
embedding_provider_name: str
vector_weight: float | None = None
embedding_model_name: str | None = None
embedding_provider_name: str | None = None
class DatasetWeightedScoreResponse(ResponseModel):
weight_type: str | None = None
keyword_setting: DatasetKeywordSettingResponse | None = None
vector_setting: DatasetVectorSettingResponse | None = None
keyword_setting: DatasetKeywordSettingResponse = Field(default_factory=DatasetKeywordSettingResponse)
vector_setting: DatasetVectorSettingResponse = Field(default_factory=DatasetVectorSettingResponse)
@field_validator("keyword_setting", "vector_setting", mode="before")
@classmethod
def _expand_null_nested(cls, value: object) -> object:
return {} if value is None else value
class DatasetRetrievalModelResponse(ResponseModel):
search_method: str
reranking_enable: bool
reranking_mode: str | None = None
reranking_model: DatasetRerankingModelResponse | None
reranking_model: DatasetRerankingModelResponse = Field(default_factory=DatasetRerankingModelResponse)
weights: DatasetWeightedScoreResponse | None = None
top_k: int
score_threshold_enabled: bool
score_threshold: float | None = None
@field_validator("reranking_model", mode="before")
@classmethod
def _expand_null_nested(cls, value: object) -> object:
return {} if value is None else value
class DatasetSummaryIndexSettingResponse(ResponseModel):
enable: bool | None = None
@ -192,10 +202,10 @@ class DatasetTagResponse(ResponseModel):
class DatasetExternalKnowledgeInfoResponse(ResponseModel):
external_knowledge_id: str
external_knowledge_api_id: str
external_knowledge_api_name: str
external_knowledge_api_endpoint: str
external_knowledge_id: str | None = None
external_knowledge_api_id: str | None = None
external_knowledge_api_name: str | None = None
external_knowledge_api_endpoint: str | None = None
class DatasetExternalRetrievalModelResponse(ResponseModel):
@ -211,8 +221,8 @@ class DatasetDocMetadataResponse(ResponseModel):
class DatasetIconInfoResponse(ResponseModel):
icon_type: str | None
icon: str | None
icon_type: str | None = None
icon: str | None = None
icon_background: str | None = None
icon_url: str | None = None
@ -237,17 +247,21 @@ class DatasetDetailResponse(ResponseModel):
embedding_model_provider: str | None
embedding_available: bool | None = None
retrieval_model_dict: DatasetRetrievalModelResponse
summary_index_setting: DatasetSummaryIndexSettingResponse | None
summary_index_setting: DatasetSummaryIndexSettingResponse = Field(
default_factory=DatasetSummaryIndexSettingResponse
)
tags: list[DatasetTagResponse]
doc_form: str | None
external_knowledge_info: DatasetExternalKnowledgeInfoResponse | None
external_knowledge_info: DatasetExternalKnowledgeInfoResponse = Field(
default_factory=DatasetExternalKnowledgeInfoResponse
)
external_retrieval_model: DatasetExternalRetrievalModelResponse | None
doc_metadata: list[DatasetDocMetadataResponse]
built_in_field_enabled: bool
pipeline_id: str | None
runtime_mode: str | None
chunk_structure: str | None
icon_info: DatasetIconInfoResponse | None
icon_info: DatasetIconInfoResponse = Field(default_factory=DatasetIconInfoResponse)
is_published: bool
total_documents: int
total_available_documents: int
@ -258,3 +272,8 @@ class DatasetDetailResponse(ResponseModel):
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
@field_validator("summary_index_setting", "external_knowledge_info", "icon_info", mode="before")
@classmethod
def _expand_null_nested(cls, value: object) -> object:
return {} if value is None else value

View File

@ -0,0 +1,65 @@
"""add workflow_version to workflow_agent_node_bindings
Restores the stage 1 §5.3 unique key
``(tenant_id, workflow_id, workflow_version, node_id)`` so draft and published
workflow bindings can coexist at the same workflow_id once we want to track
them per workflow version. ``workflow_version`` mirrors ``workflows.version``
("draft" or a published version string).
Because the New Agent Experience feature is pre-release, this table is empty
in every environment that matters; the ``server_default='draft'`` only exists
to keep developer-local rows valid during the alter and is dropped immediately
afterward so application code must specify ``workflow_version`` explicitly.
Revision ID: 97e2e1a644e8
Revises: f8b6b7e9c421
Create Date: 2026-05-25 11:43:37.611300
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = '97e2e1a644e8'
down_revision = 'f8b6b7e9c421'
branch_labels = None
depends_on = None
def upgrade():
with op.batch_alter_table('workflow_agent_node_bindings', schema=None) as batch_op:
batch_op.add_column(
sa.Column(
'workflow_version',
sa.String(length=255),
nullable=False,
server_default='draft',
)
)
batch_op.alter_column('workflow_version', server_default=None)
batch_op.drop_constraint(
batch_op.f('workflow_agent_node_binding_node_unique'), type_='unique'
)
batch_op.create_unique_constraint(
'workflow_agent_node_binding_node_version_unique',
['tenant_id', 'workflow_id', 'workflow_version', 'node_id'],
)
batch_op.create_index(
'workflow_agent_node_binding_workflow_version_idx',
['tenant_id', 'workflow_id', 'workflow_version'],
unique=False,
)
def downgrade():
with op.batch_alter_table('workflow_agent_node_bindings', schema=None) as batch_op:
batch_op.drop_index('workflow_agent_node_binding_workflow_version_idx')
batch_op.drop_constraint(
'workflow_agent_node_binding_node_version_unique', type_='unique'
)
batch_op.create_unique_constraint(
batch_op.f('workflow_agent_node_binding_node_unique'),
['tenant_id', 'workflow_id', 'node_id'],
postgresql_nulls_not_distinct=False,
)
batch_op.drop_column('workflow_version')

View File

@ -231,17 +231,29 @@ class WorkflowAgentNodeBinding(DefaultFieldsMixin, Base):
UniqueConstraint(
"tenant_id",
"workflow_id",
"workflow_version",
"node_id",
name="workflow_agent_node_binding_node_unique",
name="workflow_agent_node_binding_node_version_unique",
),
Index("workflow_agent_node_binding_agent_idx", "tenant_id", "agent_id"),
Index("workflow_agent_node_binding_current_snapshot_idx", "tenant_id", "current_snapshot_id"),
Index("workflow_agent_node_binding_app_idx", "tenant_id", "app_id"),
Index(
"workflow_agent_node_binding_workflow_version_idx",
"tenant_id",
"workflow_id",
"workflow_version",
),
)
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
# Tracks which workflow version (draft or a published version string) this
# binding belongs to. Mirrors ``Workflow.version`` and lets us keep separate
# rows for the draft workflow and each published copy under the same
# workflow_id, restoring the stage 1 §5.3 unique key.
workflow_version: Mapped[str] = mapped_column(String(255), nullable=False)
node_id: Mapped[str] = mapped_column(String(255), nullable=False)
binding_type: Mapped[WorkflowAgentBindingType] = mapped_column(
EnumText(WorkflowAgentBindingType, length=32), nullable=False

View File

@ -11708,9 +11708,9 @@ Condition detail
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | No |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | No |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
@ -11721,7 +11721,7 @@ Condition detail
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | No |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
@ -11748,9 +11748,9 @@ Condition detail
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | No |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | No |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
@ -11762,7 +11762,7 @@ Condition detail
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | No |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
@ -11790,10 +11790,10 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| external_knowledge_api_endpoint | string | | Yes |
| external_knowledge_api_id | string | | Yes |
| external_knowledge_api_name | string | | Yes |
| external_knowledge_id | string | | Yes |
| external_knowledge_api_endpoint | string | | No |
| external_knowledge_api_id | string | | No |
| external_knowledge_api_name | string | | No |
| external_knowledge_id | string | | No |
#### DatasetExternalRetrievalModelResponse
@ -11816,9 +11816,9 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| icon | string | | Yes |
| icon | string | | No |
| icon_background | string | | No |
| icon_type | string | | Yes |
| icon_type | string | | No |
| icon_url | string | | No |
#### DatasetKeywordSetting
@ -11831,7 +11831,7 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_weight | number | | Yes |
| keyword_weight | number | | No |
#### DatasetListItemResponse
@ -11852,9 +11852,9 @@ Condition detail
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | No |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | No |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
@ -11866,7 +11866,7 @@ Condition detail
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | No |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
@ -12014,7 +12014,7 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| reranking_enable | boolean | | Yes |
| reranking_mode | string | | No |
| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | Yes |
| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | No |
| score_threshold | number | | No |
| score_threshold_enabled | boolean | | Yes |
| search_method | string | | Yes |
@ -12069,9 +12069,9 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| embedding_model_name | string | | Yes |
| embedding_provider_name | string | | Yes |
| vector_weight | number | | Yes |
| embedding_model_name | string | | No |
| embedding_provider_name | string | | No |
| vector_weight | number | | No |
#### DatasetWeightedScore

View File

@ -2338,9 +2338,9 @@ Condition detail
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | No |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | No |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
@ -2351,7 +2351,7 @@ Condition detail
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | No |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
@ -2378,9 +2378,9 @@ Condition detail
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | No |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | No |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
@ -2392,7 +2392,7 @@ Condition detail
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | No |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
@ -2412,10 +2412,10 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| external_knowledge_api_endpoint | string | | Yes |
| external_knowledge_api_id | string | | Yes |
| external_knowledge_api_name | string | | Yes |
| external_knowledge_id | string | | Yes |
| external_knowledge_api_endpoint | string | | No |
| external_knowledge_api_id | string | | No |
| external_knowledge_api_name | string | | No |
| external_knowledge_id | string | | No |
#### DatasetExternalRetrievalModelResponse
@ -2429,16 +2429,16 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| icon | string | | Yes |
| icon | string | | No |
| icon_background | string | | No |
| icon_type | string | | Yes |
| icon_type | string | | No |
| icon_url | string | | No |
#### DatasetKeywordSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_weight | number | | Yes |
| keyword_weight | number | | No |
#### DatasetListQuery
@ -2522,7 +2522,7 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| reranking_enable | boolean | | Yes |
| reranking_mode | string | | No |
| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | Yes |
| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | No |
| score_threshold | number | | No |
| score_threshold_enabled | boolean | | Yes |
| search_method | string | | Yes |
@ -2566,9 +2566,9 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| embedding_model_name | string | | Yes |
| embedding_provider_name | string | | Yes |
| vector_weight | number | | Yes |
| embedding_model_name | string | | No |
| embedding_provider_name | string | | No |
| vector_weight | number | | No |
#### DatasetWeightedScoreResponse

View File

@ -28,6 +28,10 @@ from services.entities.agent_entities import (
WorkflowNodeJobConfig,
)
# WorkflowAgentNodeBinding.workflow_version tag for the draft workflow row.
# Mirrors Workflow.version when it is "draft" (see models/workflow.py).
_DRAFT_WORKFLOW_VERSION = "draft"
class AgentComposerService:
@classmethod
@ -284,6 +288,7 @@ class AgentComposerService:
tenant_id=tenant_id,
app_id=app_id,
workflow_id=workflow_id,
workflow_version=_DRAFT_WORKFLOW_VERSION,
node_id=node_id,
binding_type=WorkflowAgentBindingType.INLINE_AGENT,
agent_id=agent.id,
@ -387,6 +392,7 @@ class AgentComposerService:
tenant_id=tenant_id,
app_id=app_id,
workflow_id=workflow_id,
workflow_version=_DRAFT_WORKFLOW_VERSION,
node_id=node_id,
created_by=account_id,
)
@ -606,11 +612,16 @@ class AgentComposerService:
def _get_workflow_binding(
cls, *, tenant_id: str, workflow_id: str, node_id: str
) -> WorkflowAgentNodeBinding | None:
# Composer always operates against the draft workflow row, so this lookup
# is scoped to ``workflow_version="draft"``. Published bindings are
# materialized by WorkflowAgentPublishService.copy_agent_node_bindings_to_published
# and are not edited through the Composer.
return db.session.scalar(
select(WorkflowAgentNodeBinding)
.where(
WorkflowAgentNodeBinding.tenant_id == tenant_id,
WorkflowAgentNodeBinding.workflow_id == workflow_id,
WorkflowAgentNodeBinding.workflow_version == _DRAFT_WORKFLOW_VERSION,
WorkflowAgentNodeBinding.node_id == node_id,
)
.limit(1)

View File

@ -39,6 +39,7 @@ class WorkflowAgentPublishService:
WorkflowAgentNodeBinding.tenant_id == draft_workflow.tenant_id,
WorkflowAgentNodeBinding.app_id == draft_workflow.app_id,
WorkflowAgentNodeBinding.workflow_id == draft_workflow.id,
WorkflowAgentNodeBinding.workflow_version == draft_workflow.version,
WorkflowAgentNodeBinding.node_id.in_(node_ids),
)
).all()
@ -48,6 +49,7 @@ class WorkflowAgentPublishService:
tenant_id=binding.tenant_id,
app_id=binding.app_id,
workflow_id=published_workflow.id,
workflow_version=published_workflow.version,
node_id=binding.node_id,
binding_type=binding.binding_type,
agent_id=binding.agent_id,

View File

@ -0,0 +1,173 @@
from fields.dataset_fields import DatasetDetailResponse
def _dataset_detail_payload(**overrides):
payload = {
"id": "ds-1",
"name": "Dataset",
"description": "desc",
"provider": "vendor",
"permission": "only_me",
"data_source_type": None,
"indexing_technique": "economy",
"app_count": 0,
"document_count": 0,
"word_count": 0,
"created_by": "account-1",
"author_name": None,
"created_at": 1704067200,
"updated_by": None,
"updated_at": 1704067200,
"embedding_model": None,
"embedding_model_provider": None,
"embedding_available": True,
"retrieval_model_dict": {
"search_method": "hybrid_search",
"reranking_enable": True,
"reranking_mode": "weighted_score",
"reranking_model": {
"reranking_provider_name": "provider",
"reranking_model_name": "model",
},
"weights": {
"weight_type": "customized",
"keyword_setting": {"keyword_weight": 0.3},
"vector_setting": {
"vector_weight": 0.7,
"embedding_model_name": "embedding",
"embedding_provider_name": "provider",
},
},
"top_k": 3,
"score_threshold_enabled": False,
"score_threshold": None,
},
"summary_index_setting": {
"enable": False,
"model_name": None,
"model_provider_name": None,
"summary_prompt": None,
},
"tags": [],
"doc_form": None,
"external_knowledge_info": {
"external_knowledge_id": "knowledge-id",
"external_knowledge_api_id": "api-id",
"external_knowledge_api_name": "api",
"external_knowledge_api_endpoint": "https://example.com",
},
"external_retrieval_model": None,
"doc_metadata": [],
"built_in_field_enabled": False,
"pipeline_id": None,
"runtime_mode": "general",
"chunk_structure": None,
"icon_info": {
"icon_type": "emoji",
"icon": "📙",
"icon_background": None,
"icon_url": None,
},
"is_published": False,
"total_documents": 0,
"total_available_documents": 0,
"enable_api": False,
"is_multimodal": False,
}
payload.update(overrides)
return payload
def _dump_dataset_detail(payload):
return DatasetDetailResponse.model_validate(payload).model_dump(mode="json")
def test_dataset_detail_expands_legacy_null_nested_fields():
response = _dump_dataset_detail(
_dataset_detail_payload(
summary_index_setting=None,
external_knowledge_info=None,
icon_info=None,
)
)
assert response["summary_index_setting"] == {
"enable": None,
"model_name": None,
"model_provider_name": None,
"summary_prompt": None,
}
assert response["external_knowledge_info"] == {
"external_knowledge_id": None,
"external_knowledge_api_id": None,
"external_knowledge_api_name": None,
"external_knowledge_api_endpoint": None,
}
assert response["icon_info"] == {
"icon_type": None,
"icon": None,
"icon_background": None,
"icon_url": None,
}
assert response["external_retrieval_model"] is None
def test_dataset_detail_expands_legacy_null_retrieval_nested_fields():
response = _dump_dataset_detail(
_dataset_detail_payload(
retrieval_model_dict={
"search_method": "hybrid_search",
"reranking_enable": True,
"reranking_mode": "weighted_score",
"reranking_model": None,
"weights": {
"keyword_setting": None,
"vector_setting": None,
},
"top_k": 3,
"score_threshold_enabled": False,
"score_threshold": None,
}
)
)
assert response["retrieval_model_dict"]["reranking_model"] == {
"reranking_provider_name": None,
"reranking_model_name": None,
}
assert response["retrieval_model_dict"]["weights"] == {
"weight_type": None,
"keyword_setting": {"keyword_weight": None},
"vector_setting": {
"vector_weight": None,
"embedding_model_name": None,
"embedding_provider_name": None,
},
}
def test_dataset_detail_expands_missing_weighted_score_nested_fields():
response = _dump_dataset_detail(
_dataset_detail_payload(
retrieval_model_dict={
"search_method": "hybrid_search",
"reranking_enable": True,
"reranking_mode": "weighted_score",
"reranking_model": None,
"weights": {},
"top_k": 3,
"score_threshold_enabled": False,
"score_threshold": None,
}
)
)
assert response["retrieval_model_dict"]["weights"] == {
"weight_type": None,
"keyword_setting": {"keyword_weight": None},
"vector_setting": {
"vector_weight": None,
"embedding_model_name": None,
"embedding_provider_name": None,
},
}