merge main

2026-05-04 01:18:05 +08:00 · 2025-08-05 10:30:53 +08:00
parent 1e5317d3f0 d8584dc03a
commit 201e4cd64d
308 changed files with 10716 additions and 1994 deletions
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@ -597,7 +597,7 @@ def _extract_text_from_vtt(vtt_bytes: bytes) -> str:

        for i in range(1, len(raw_results)):
            spk, txt = raw_results[i]
-            if spk == None:
+            if spk is None:
                merged_results.append((None, current_text))
                continue

--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@ -277,6 +277,22 @@ class Executor:
            elif self.auth.config.type == "custom":
                headers[authorization.config.header] = authorization.config.api_key or ""

+        # Handle Content-Type for multipart/form-data requests
+        # Fix for issue #22880: Missing boundary when using multipart/form-data
+        body = self.node_data.body
+        if body and body.type == "form-data":
+            # For multipart/form-data with files, let httpx handle the boundary automatically
+            # by not setting Content-Type header when files are present
+            if not self.files or all(f[0] == "__multipart_placeholder__" for f in self.files):
+                # Only set Content-Type when there are no actual files
+                # This ensures httpx generates the correct boundary
+                if "content-type" not in (k.lower() for k in headers):
+                    headers["Content-Type"] = "multipart/form-data"
+        elif body and body.type in BODY_TYPE_TO_CONTENT_TYPE:
+            # Set Content-Type for other body types
+            if "content-type" not in (k.lower() for k in headers):
+                headers["Content-Type"] = BODY_TYPE_TO_CONTENT_TYPE[body.type]
+
        return headers

    def _validate_and_parse_response(self, response: httpx.Response) -> Response:
@ -384,15 +400,24 @@ class Executor:
        # '__multipart_placeholder__' is inserted to force multipart encoding but is not a real file.
        # This prevents logging meaningless placeholder entries.
        if self.files and not all(f[0] == "__multipart_placeholder__" for f in self.files):
-            for key, (filename, content, mime_type) in self.files:
+            for file_entry in self.files:
+                # file_entry should be (key, (filename, content, mime_type)), but handle edge cases
+                if len(file_entry) != 2 or not isinstance(file_entry[1], tuple) or len(file_entry[1]) < 2:
+                    continue  # skip malformed entries
+                key = file_entry[0]
+                content = file_entry[1][1]
                body_string += f"--{boundary}\r\n"
                body_string += f'Content-Disposition: form-data; name="{key}"\r\n\r\n'
-                # decode content
-                try:
-                    body_string += content.decode("utf-8")
-                except UnicodeDecodeError:
-                    # fix: decode binary content
-                    pass
+                # decode content safely
+                if isinstance(content, bytes):
+                    try:
+                        body_string += content.decode("utf-8")
+                    except UnicodeDecodeError:
+                        body_string += content.decode("utf-8", errors="replace")
+                elif isinstance(content, str):
+                    body_string += content
+                else:
+                    body_string += f"[Unsupported content type: {type(content).__name__}]"
                body_string += "\r\n"
            body_string += f"--{boundary}--\r\n"
        elif self.node_data.body:
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -3,7 +3,7 @@ import io
 import json
 import logging
 from collections.abc import Generator, Mapping, Sequence
-from typing import TYPE_CHECKING, Any, Optional, cast
+from typing import TYPE_CHECKING, Any, Optional

 from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
 from core.file import FileType, file_manager
@ -33,12 +33,10 @@ from core.model_runtime.entities.message_entities import (
    UserPromptMessage,
 )
 from core.model_runtime.entities.model_entities import (
-    AIModelEntity,
    ModelFeature,
    ModelPropertyKey,
    ModelType,
 )
-from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from core.model_runtime.utils.encoders import jsonable_encoder
 from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptTemplate, MemoryConfig
 from core.prompt.utils.prompt_message_util import PromptMessageUtil
@ -1006,21 +1004,6 @@ class LLMNode(BaseNode):
            )
        return saved_file

-    def _fetch_model_schema(self, provider: str) -> AIModelEntity | None:
-        """
-        Fetch model schema
-        """
-        model_name = self._node_data.model.name
-        model_manager = ModelManager()
-        model_instance = model_manager.get_model_instance(
-            tenant_id=self.tenant_id, model_type=ModelType.LLM, provider=provider, model=model_name
-        )
-        model_type_instance = model_instance.model_type_instance
-        model_type_instance = cast(LargeLanguageModel, model_type_instance)
-        model_credentials = model_instance.credentials
-        model_schema = model_type_instance.get_model_schema(model_name, model_credentials)
-        return model_schema
-
    @staticmethod
    def fetch_structured_output_schema(
        *,
--- a/api/core/workflow/nodes/tool/tool_node.py
+++ b/api/core/workflow/nodes/tool/tool_node.py
@ -318,6 +318,33 @@ class ToolNode(BaseNode):
                    json.append(message.message.json_object)
            elif message.type == ToolInvokeMessage.MessageType.LINK:
                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
+
+                if message.meta:
+                    transfer_method = message.meta.get("transfer_method", FileTransferMethod.TOOL_FILE)
+                else:
+                    transfer_method = FileTransferMethod.TOOL_FILE
+
+                tool_file_id = message.message.text.split("/")[-1].split(".")[0]
+
+                with Session(db.engine) as session:
+                    stmt = select(ToolFile).where(ToolFile.id == tool_file_id)
+                    tool_file = session.scalar(stmt)
+                    if tool_file is None:
+                        raise ToolFileError(f"Tool file {tool_file_id} does not exist")
+
+                mapping = {
+                    "tool_file_id": tool_file_id,
+                    "type": file_factory.get_file_type_by_mime_type(tool_file.mimetype),
+                    "transfer_method": transfer_method,
+                    "url": message.message.text,
+                }
+
+                file = file_factory.build_from_mapping(
+                    mapping=mapping,
+                    tenant_id=self.tenant_id,
+                )
+                files.append(file)
+
                stream_text = f"Link: {message.message.text}\n"
                text += stream_text
                yield RunStreamChunkEvent(chunk_content=stream_text, from_variable_selector=[node_id, "text"])