Merge remote-tracking branch 'origin/main' into feat/trigger

2026-04-22 11:47:40 +08:00 · 2025-09-30 18:56:21 +08:00
parent 7c97ea4a9e decf0f3da0
commit 1bfa8e6662
30 changed files with 704 additions and 1210 deletions
--- a/api/core/workflow/node_events/node.py
+++ b/api/core/workflow/node_events/node.py
@ -20,6 +20,7 @@ class ModelInvokeCompletedEvent(NodeEventBase):
    usage: LLMUsage
    finish_reason: str | None = None
    reasoning_content: str | None = None
+    structured_output: dict | None = None


 class RunRetryEvent(NodeEventBase):
--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@ -87,7 +87,7 @@ class Executor:
                node_data.authorization.config.api_key
            ).text

-        self.url: str = node_data.url
+        self.url = node_data.url
        self.method = node_data.method
        self.auth = node_data.authorization
        self.timeout = timeout
@ -349,11 +349,10 @@ class Executor:
            "timeout": (self.timeout.connect, self.timeout.read, self.timeout.write),
            "ssl_verify": self.ssl_verify,
            "follow_redirects": True,
-            "max_retries": self.max_retries,
        }
        # request_args = {k: v for k, v in request_args.items() if v is not None}
        try:
-            response: httpx.Response = _METHOD_MAP[method_lc](**request_args)
+            response: httpx.Response = _METHOD_MAP[method_lc](**request_args, max_retries=self.max_retries)
        except (ssrf_proxy.MaxRetriesExceededError, httpx.RequestError) as e:
            raise HttpRequestNodeError(str(e)) from e
        # FIXME: fix type ignore, this maybe httpx type issue
--- a/api/core/workflow/nodes/http_request/node.py
+++ b/api/core/workflow/nodes/http_request/node.py
@ -165,6 +165,8 @@ class HttpRequestNode(Node):
            body_type = typed_node_data.body.type
            data = typed_node_data.body.data
            match body_type:
+                case "none":
+                    pass
                case "binary":
                    if len(data) != 1:
                        raise RequestBodyError("invalid body data, should have only one item")
--- a/api/core/workflow/nodes/if_else/if_else_node.py
+++ b/api/core/workflow/nodes/if_else/if_else_node.py
@ -83,7 +83,7 @@ class IfElseNode(Node):
            else:
                # TODO: Update database then remove this
                # Fallback to old structure if cases are not defined
-                input_conditions, group_result, final_result = _should_not_use_old_function(  # ty: ignore [deprecated]
+                input_conditions, group_result, final_result = _should_not_use_old_function(  # pyright: ignore [reportDeprecated]
                    condition_processor=condition_processor,
                    variable_pool=self.graph_runtime_state.variable_pool,
                    conditions=self._node_data.conditions or [],
--- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
+++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
@ -136,6 +136,11 @@ class KnowledgeIndexNode(Node):
        document = db.session.query(Document).filter_by(id=document_id.value).first()
        if not document:
            raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.")
+        doc_id_value = document.id
+        ds_id_value = dataset.id
+        dataset_name_value = dataset.name
+        document_name_value = document.name
+        created_at_value = document.created_at
        # chunk nodes by chunk size
        indexing_start_at = time.perf_counter()
        index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
@ -161,16 +166,16 @@ class KnowledgeIndexNode(Node):
        document.word_count = (
            db.session.query(func.sum(DocumentSegment.word_count))
            .where(
-                DocumentSegment.document_id == document.id,
-                DocumentSegment.dataset_id == dataset.id,
+                DocumentSegment.document_id == doc_id_value,
+                DocumentSegment.dataset_id == ds_id_value,
            )
            .scalar()
        )
        db.session.add(document)
        # update document segment status
        db.session.query(DocumentSegment).where(
-            DocumentSegment.document_id == document.id,
-            DocumentSegment.dataset_id == dataset.id,
+            DocumentSegment.document_id == doc_id_value,
+            DocumentSegment.dataset_id == ds_id_value,
        ).update(
            {
                DocumentSegment.status: "completed",
@ -182,13 +187,13 @@ class KnowledgeIndexNode(Node):
        db.session.commit()

        return {
-            "dataset_id": dataset.id,
-            "dataset_name": dataset.name,
+            "dataset_id": ds_id_value,
+            "dataset_name": dataset_name_value,
            "batch": batch.value,
-            "document_id": document.id,
-            "document_name": document.name,
-            "created_at": document.created_at.timestamp(),
-            "display_status": document.indexing_status,
+            "document_id": doc_id_value,
+            "document_name": document_name_value,
+            "created_at": created_at_value.timestamp(),
+            "display_status": "completed",
        }

    def _get_preview_output(self, chunk_structure: str, chunks: Any) -> Mapping[str, Any]:
--- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
+++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
@ -107,7 +107,7 @@ class KnowledgeRetrievalNode(Node):
            graph_runtime_state=graph_runtime_state,
        )
        # LLM file outputs, used for MultiModal outputs.
-        self._file_outputs: list[File] = []
+        self._file_outputs = []

        if llm_file_saver is None:
            llm_file_saver = FileSaverImpl(
--- a/api/core/workflow/nodes/list_operator/node.py
+++ b/api/core/workflow/nodes/list_operator/node.py
@ -161,6 +161,8 @@ class ListOperatorNode(Node):
            elif isinstance(variable, ArrayFileSegment):
                if isinstance(condition.value, str):
                    value = self.graph_runtime_state.variable_pool.convert_template(condition.value).text
+                elif isinstance(condition.value, bool):
+                    raise ValueError(f"File filter expects a string value, got {type(condition.value)}")
                else:
                    value = condition.value
                filter_func = _get_file_filter_func(
--- a/api/core/workflow/nodes/llm/file_saver.py
+++ b/api/core/workflow/nodes/llm/file_saver.py
@ -46,7 +46,7 @@ class LLMFileSaver(tp.Protocol):
            dot (`.`). For example, `.py` and `.tar.gz` are both valid values, while `py`
            and `tar.gz` are not.
        """
-        pass
+        raise NotImplementedError()

    def save_remote_url(self, url: str, file_type: FileType) -> File:
        """save_remote_url saves the file from a remote url returned by LLM.
@ -56,7 +56,7 @@ class LLMFileSaver(tp.Protocol):
        :param url: the url of the file.
        :param file_type: the file type of the file, check `FileType` enum for reference.
        """
-        pass
+        raise NotImplementedError()


 EngineFactory: tp.TypeAlias = tp.Callable[[], Engine]
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -23,6 +23,7 @@ from core.model_runtime.entities.llm_entities import (
    LLMResult,
    LLMResultChunk,
    LLMResultChunkWithStructuredOutput,
+    LLMResultWithStructuredOutput,
    LLMStructuredOutput,
    LLMUsage,
 )
@ -127,7 +128,7 @@ class LLMNode(Node):
            graph_runtime_state=graph_runtime_state,
        )
        # LLM file outputs, used for MultiModal outputs.
-        self._file_outputs: list[File] = []
+        self._file_outputs = []

        if llm_file_saver is None:
            llm_file_saver = FileSaverImpl(
@ -165,6 +166,7 @@ class LLMNode(Node):
        node_inputs: dict[str, Any] = {}
        process_data: dict[str, Any] = {}
        result_text = ""
+        clean_text = ""
        usage = LLMUsage.empty_usage()
        finish_reason = None
        reasoning_content = None
@ -278,6 +280,13 @@ class LLMNode(Node):
                        # Extract clean text from <think> tags
                        clean_text, _ = LLMNode._split_reasoning(result_text, self._node_data.reasoning_format)

+                    # Process structured output if available from the event.
+                    structured_output = (
+                        LLMStructuredOutput(structured_output=event.structured_output)
+                        if event.structured_output
+                        else None
+                    )
+
                    # deduct quota
                    llm_utils.deduct_llm_quota(tenant_id=self.tenant_id, model_instance=model_instance, usage=usage)
                    break
@ -1048,7 +1057,7 @@ class LLMNode(Node):
    @staticmethod
    def handle_blocking_result(
        *,
-        invoke_result: LLMResult,
+        invoke_result: LLMResult | LLMResultWithStructuredOutput,
        saver: LLMFileSaver,
        file_outputs: list["File"],
        reasoning_format: Literal["separated", "tagged"] = "tagged",
@ -1079,6 +1088,8 @@ class LLMNode(Node):
            finish_reason=None,
            # Reasoning content for workflow variables and downstream nodes
            reasoning_content=reasoning_content,
+            # Pass structured output if enabled
+            structured_output=getattr(invoke_result, "structured_output", None),
        )

    @staticmethod
--- a/api/core/workflow/nodes/parameter_extractor/prompts.py
+++ b/api/core/workflow/nodes/parameter_extractor/prompts.py
@ -179,6 +179,6 @@ CHAT_EXAMPLE = [
                "required": ["food"],
            },
        },
-        "assistant": {"text": "I need to output a valid JSON object.", "json": {"result": "apple pie"}},
+        "assistant": {"text": "I need to output a valid JSON object.", "json": {"food": "apple pie"}},
    },
 ]
--- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py
+++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py
@ -68,7 +68,7 @@ class QuestionClassifierNode(Node):
            graph_runtime_state=graph_runtime_state,
        )
        # LLM file outputs, used for MultiModal outputs.
-        self._file_outputs: list[File] = []
+        self._file_outputs = []

        if llm_file_saver is None:
            llm_file_saver = FileSaverImpl(
@ -111,9 +111,9 @@ class QuestionClassifierNode(Node):
        query = variable.value if variable else None
        variables = {"query": query}
        # fetch model config
-        model_instance, model_config = LLMNode._fetch_model_config(
-            node_data_model=node_data.model,
+        model_instance, model_config = llm_utils.fetch_model_config(
            tenant_id=self.tenant_id,
+            node_data_model=node_data.model,
        )
        # fetch memory
        memory = llm_utils.fetch_memory(
--- a/api/core/workflow/workflow_entry.py
+++ b/api/core/workflow/workflow_entry.py
@ -416,4 +416,8 @@ class WorkflowEntry:

            # append variable and value to variable pool
            if variable_node_id != ENVIRONMENT_VARIABLE_NODE_ID:
+                # In single run, the input_value is set as the LLM's structured output value within the variable_pool.
+                if len(variable_key_list) == 2 and variable_key_list[0] == "structured_output":
+                    input_value = {variable_key_list[1]: input_value}
+                    variable_key_list = variable_key_list[0:1]
                variable_pool.add([variable_node_id] + variable_key_list, input_value)