Merge branch 'main' into fix/chore-fix

2026-04-24 21:05:48 +08:00 · 2024-12-06 16:45:25 +08:00
parent b7ac287fec a8d32f9964
commit 337eff2b79
85 changed files with 1846 additions and 1227 deletions
--- a/api/commands.py
+++ b/api/commands.py
@ -260,7 +260,7 @@ def migrate_knowledge_vector_database():
    skipped_count = 0
    total_count = 0
    vector_type = dify_config.VECTOR_STORE
-    upper_colletion_vector_types = {
+    upper_collection_vector_types = {
        VectorType.MILVUS,
        VectorType.PGVECTOR,
        VectorType.RELYT,
@ -268,7 +268,7 @@ def migrate_knowledge_vector_database():
        VectorType.ORACLE,
        VectorType.ELASTICSEARCH,
    }
-    lower_colletion_vector_types = {
+    lower_collection_vector_types = {
        VectorType.ANALYTICDB,
        VectorType.CHROMA,
        VectorType.MYSCALE,
@ -308,7 +308,7 @@ def migrate_knowledge_vector_database():
                        continue
                collection_name = ""
                dataset_id = dataset.id
-                if vector_type in upper_colletion_vector_types:
+                if vector_type in upper_collection_vector_types:
                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                elif vector_type == VectorType.QDRANT:
                    if dataset.collection_binding_id:
@ -324,7 +324,7 @@ def migrate_knowledge_vector_database():
                    else:
                        collection_name = Dataset.gen_collection_name_by_id(dataset_id)

-                elif vector_type in lower_colletion_vector_types:
+                elif vector_type in lower_collection_vector_types:
                    collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
                else:
                    raise ValueError(f"Vector store {vector_type} is not supported.")
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.13.0",
+        default="0.13.1",
    )

    COMMIT_SHA: str = Field(
--- a/api/core/app/apps/README.md
+++ b/api/core/app/apps/README.md
@ -2,7 +2,7 @@

 Due to the presence of tasks in App Runner that require long execution times, such as LLM generation and external requests, Flask-Sqlalchemy's strategy for database connection pooling is to allocate one connection (transaction) per request. This approach keeps a connection occupied even during non-DB tasks, leading to the inability to acquire new connections during high concurrency requests due to multiple long-running tasks.

-Therefore, the database operations in App Runner and Task Pipeline must ensure connections are closed immediately after use, and it's better to pass IDs rather than Model objects to avoid deattach errors.
+Therefore, the database operations in App Runner and Task Pipeline must ensure connections are closed immediately after use, and it's better to pass IDs rather than Model objects to avoid detach errors.

 Examples:

--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@ -72,7 +72,7 @@ class AppGenerateResponseConverter(ABC):
            for resource in metadata["retriever_resources"]:
                updated_resources.append(
                    {
-                        "segment_id": resource["segment_id"],
+                        "segment_id": resource.get("segment_id", ""),
                        "position": resource["position"],
                        "document_name": resource["document_name"],
                        "score": resource["score"],
--- a/api/core/model_runtime/docs/zh_Hans/interfaces.md
+++ b/api/core/model_runtime/docs/zh_Hans/interfaces.md
@ -91,7 +91,7 @@ class XinferenceProvider(Provider):
      """
  ```

-  也可以直接抛出对应Erros，并做如下定义，这样在之后的调用中可以直接抛出`InvokeConnectionError`等异常。
+  也可以直接抛出对应 Errors，并做如下定义，这样在之后的调用中可以直接抛出`InvokeConnectionError`等异常。
  
    ```python
    @property
--- a/api/core/prompt/prompt_templates/advanced_prompt_templates.py
+++ b/api/core/prompt/prompt_templates/advanced_prompt_templates.py
@ -5,7 +5,7 @@ BAICHUAN_CONTEXT = "用户在与一个客观的助手对话。助手会尊重找
 CHAT_APP_COMPLETION_PROMPT_CONFIG = {
    "completion_prompt_config": {
        "prompt": {
-            "text": "{{#pre_prompt#}}\nHere is the chat histories between human and assistant, inside <histories></histories> XML tags.\n\n<histories>\n{{#histories#}}\n</histories>\n\n\nHuman: {{#query#}}\n\nAssistant: "  # noqa: E501
+            "text": "{{#pre_prompt#}}\nHere are the chat histories between human and assistant, inside <histories></histories> XML tags.\n\n<histories>\n{{#histories#}}\n</histories>\n\n\nHuman: {{#query#}}\n\nAssistant: "  # noqa: E501
        },
        "conversation_histories_role": {"user_prefix": "Human", "assistant_prefix": "Assistant"},
    },
--- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
@ -375,7 +375,6 @@ class TidbOnQdrantVector(BaseVector):
        for result in results:
            if result:
                document = self._document_from_scored_point(result, Field.CONTENT_KEY.value, Field.METADATA_KEY.value)
-                document.metadata["vector"] = result.vector
                documents.append(document)

        return documents
@ -394,6 +393,7 @@ class TidbOnQdrantVector(BaseVector):
    ) -> Document:
        return Document(
            page_content=scored_point.payload.get(content_payload_key),
+            vector=scored_point.vector,
            metadata=scored_point.payload.get(metadata_payload_key) or {},
        )

--- a/api/core/workflow/nodes/if_else/if_else_node.py
+++ b/api/core/workflow/nodes/if_else/if_else_node.py
@ -24,7 +24,7 @@ class IfElseNode(BaseNode[IfElseNodeData]):
        """
        node_inputs: dict[str, list] = {"conditions": []}

-        process_datas: dict[str, list] = {"condition_results": []}
+        process_data: dict[str, list] = {"condition_results": []}

        input_conditions = []
        final_result = False
@ -40,7 +40,7 @@ class IfElseNode(BaseNode[IfElseNodeData]):
                        operator=case.logical_operator,
                    )

-                    process_datas["condition_results"].append(
+                    process_data["condition_results"].append(
                        {
                            "group": case.model_dump(),
                            "results": group_result,
@ -65,7 +65,7 @@ class IfElseNode(BaseNode[IfElseNodeData]):

                selected_case_id = "true" if final_result else "false"

-                process_datas["condition_results"].append(
+                process_data["condition_results"].append(
                    {"group": "default", "results": group_result, "final_result": final_result}
                )

@ -73,7 +73,7 @@ class IfElseNode(BaseNode[IfElseNodeData]):

        except Exception as e:
            return NodeRunResult(
-                status=WorkflowNodeExecutionStatus.FAILED, inputs=node_inputs, process_data=process_datas, error=str(e)
+                status=WorkflowNodeExecutionStatus.FAILED, inputs=node_inputs, process_data=process_data, error=str(e)
            )

        outputs = {"result": final_result, "selected_case_id": selected_case_id}
@ -81,7 +81,7 @@ class IfElseNode(BaseNode[IfElseNodeData]):
        data = NodeRunResult(
            status=WorkflowNodeExecutionStatus.SUCCEEDED,
            inputs=node_inputs,
-            process_data=process_datas,
+            process_data=process_data,
            edge_source_handle=selected_case_id or "false",  # Use case ID or 'default'
            outputs=outputs,
        )
--- a/api/core/workflow/nodes/iteration/iteration_node.py
+++ b/api/core/workflow/nodes/iteration/iteration_node.py
@ -117,7 +117,7 @@ class IterationNode(BaseNode[IterationNodeData]):
        variable_pool.add([self.node_id, "item"], iterator_list_value[0])

        # init graph engine
-        from core.workflow.graph_engine.graph_engine import GraphEngine
+        from core.workflow.graph_engine.graph_engine import GraphEngine, GraphEngineThreadPool

        graph_engine = GraphEngine(
            tenant_id=self.tenant_id,
@ -163,8 +163,7 @@ class IterationNode(BaseNode[IterationNodeData]):
            if self.node_data.is_parallel:
                futures: list[Future] = []
                q = Queue()
-                thread_pool = graph_engine.workflow_thread_pool_mapping[graph_engine.thread_pool_id]
-                thread_pool._max_workers = self.node_data.parallel_nums
+                thread_pool = GraphEngineThreadPool(max_workers=self.node_data.parallel_nums, max_submit_count=100)
                for index, item in enumerate(iterator_list_value):
                    future: Future = thread_pool.submit(
                        self._run_single_iter_parallel,
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -815,7 +815,7 @@ class LLMNode(BaseNode[LLMNodeData]):
                    "completion_model": {
                        "conversation_histories_role": {"user_prefix": "Human", "assistant_prefix": "Assistant"},
                        "prompt": {
-                            "text": "Here is the chat histories between human and assistant, inside "
+                            "text": "Here are the chat histories between human and assistant, inside "
                            "<histories></histories> XML tags.\n\n<histories>\n{{"
                            "#histories#}}\n</histories>\n\n\nHuman: {{#sys.query#}}\n\nAssistant:",
                            "edition_type": "basic",
--- a/api/core/workflow/nodes/parameter_extractor/prompts.py
+++ b/api/core/workflow/nodes/parameter_extractor/prompts.py
@ -98,7 +98,7 @@ Step 3: Structure the extracted parameters to JSON object as specified in <struc
 Step 4: Ensure that the JSON object is properly formatted and valid. The output should not contain any XML tags. Only the JSON object should be outputted.

 ### Memory
-Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
+Here are the chat histories between human and assistant, inside <histories></histories> XML tags.
 <histories>
 {histories}
 </histories>
@ -125,7 +125,7 @@ CHAT_GENERATE_JSON_PROMPT = """You should always follow the instructions and out
 The structure of the JSON object you can found in the instructions.

 ### Memory
-Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
+Here are the chat histories between human and assistant, inside <histories></histories> XML tags.
 <histories>
 {histories}
 </histories>
--- a/api/core/workflow/nodes/question_classifier/template_prompts.py
+++ b/api/core/workflow/nodes/question_classifier/template_prompts.py
@ -8,7 +8,7 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
    ### Constraint
    DO NOT include anything other than the JSON array in your response.
    ### Memory
-    Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
+    Here are the chat histories between human and assistant, inside <histories></histories> XML tags.
    <histories>
    {histories}
    </histories>
@ -66,7 +66,7 @@ User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"
 Assistant:{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Experience"}}
 </example> 
 ### Memory
-Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
+Here are the chat histories between human and assistant, inside <histories></histories> XML tags.
 <histories>
 {histories}
 </histories>
--- a/api/core/workflow/nodes/variable_assigner/v2/exc.py
+++ b/api/core/workflow/nodes/variable_assigner/v2/exc.py
@ -7,8 +7,8 @@ from .enums import InputType, Operation


 class OperationNotSupportedError(VariableOperatorNodeError):
-    def __init__(self, *, operation: Operation, varialbe_type: str):
-        super().__init__(f"Operation {operation} is not supported for type {varialbe_type}")
+    def __init__(self, *, operation: Operation, variable_type: str):
+        super().__init__(f"Operation {operation} is not supported for type {variable_type}")


 class InputTypeNotSupportedError(VariableOperatorNodeError):
--- a/api/core/workflow/nodes/variable_assigner/v2/node.py
+++ b/api/core/workflow/nodes/variable_assigner/v2/node.py
@ -45,7 +45,7 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):

                # Check if operation is supported
                if not helpers.is_operation_supported(variable_type=variable.value_type, operation=item.operation):
-                    raise OperationNotSupportedError(operation=item.operation, varialbe_type=variable.value_type)
+                    raise OperationNotSupportedError(operation=item.operation, variable_type=variable.value_type)

                # Check if variable input is supported
                if item.input_type == InputType.VARIABLE and not helpers.is_variable_input_supported(
@ -156,4 +156,4 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
            case Operation.DIVIDE:
                return variable.value / value
            case _:
-                raise OperationNotSupportedError(operation=operation, varialbe_type=variable.value_type)
+                raise OperationNotSupportedError(operation=operation, variable_type=variable.value_type)
--- a/api/poetry.lock
+++ b/api/poetry.lock
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -20,7 +20,7 @@ azure-ai-inference = "~1.0.0b3"
 azure-ai-ml = "~1.20.0"
 azure-identity = "1.16.1"
 beautifulsoup4 = "4.12.2"
-boto3 = "1.35.17"
+boto3 = "1.35.74"
 bs4 = "~0.0.1"
 cachetools = "~5.3.0"
 celery = "~5.4.0"
--- a/api/tasks/external_document_indexing_task.py
+++ b/api/tasks/external_document_indexing_task.py
@ -5,7 +5,7 @@ import time
 import click
 from celery import shared_task

-from core.indexing_runner import DocumentIsPausedException
+from core.indexing_runner import DocumentIsPausedError
 from extensions.ext_database import db
 from extensions.ext_storage import storage
 from models.dataset import Dataset, ExternalKnowledgeApis
@ -86,7 +86,7 @@ def external_document_indexing_task(
                fg="green",
            )
        )
-    except DocumentIsPausedException as ex:
+    except DocumentIsPausedError as ex:
        logging.info(click.style(str(ex), fg="yellow"))

    except Exception: