Merge commit '9c339239' into sandboxed-agent-rebase

Made-with: Cursor # Conflicts: # api/README.md # api/controllers/console/app/workflow_draft_variable.py # api/core/agent/cot_agent_runner.py # api/core/agent/fc_agent_runner.py # api/core/app/apps/advanced_chat/app_runner.py # api/core/plugin/backwards_invocation/model.py # api/core/prompt/advanced_prompt_transform.py # api/core/workflow/nodes/base/node.py # api/core/workflow/nodes/llm/llm_utils.py # api/core/workflow/nodes/llm/node.py # api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py # api/core/workflow/nodes/question_classifier/question_classifier_node.py # api/core/workflow/runtime/graph_runtime_state.py # api/extensions/storage/base_storage.py # api/factories/variable_factory.py # api/pyproject.toml # api/services/variable_truncator.py # api/uv.lock # web/app/account/oauth/authorize/page.tsx # web/app/components/app/configuration/config-var/config-modal/field.tsx # web/app/components/base/alert.tsx # web/app/components/base/chat/chat/answer/human-input-content/executed-action.tsx # web/app/components/base/chat/chat/answer/more.tsx # web/app/components/base/chat/chat/answer/operation.tsx # web/app/components/base/chat/chat/answer/workflow-process.tsx # web/app/components/base/chat/chat/citation/index.tsx # web/app/components/base/chat/chat/citation/popup.tsx # web/app/components/base/chat/chat/citation/progress-tooltip.tsx # web/app/components/base/chat/chat/citation/tooltip.tsx # web/app/components/base/chat/chat/question.tsx # web/app/components/base/chat/embedded-chatbot/inputs-form/index.tsx # web/app/components/base/chat/embedded-chatbot/inputs-form/view-form-dropdown.tsx # web/app/components/base/markdown-blocks/form.tsx # web/app/components/base/prompt-editor/plugins/hitl-input-block/component-ui.tsx # web/app/components/base/tag-management/panel.tsx # web/app/components/base/tag-management/trigger.tsx # web/app/components/header/account-setting/index.tsx # web/app/components/header/account-setting/members-page/transfer-ownership-modal/index.tsx # web/app/components/header/account-setting/model-provider-page/provider-added-card/index.tsx # web/app/signin/utils/post-login-redirect.ts # web/eslint-suppressions.json # web/package.json # web/pnpm-lock.yaml
2026-04-29 15:08:06 +08:00 · 2026-03-23 09:00:45 +08:00
parent 51c418f497 9c33923985
commit cccff6768a
1009 changed files with 76072 additions and 18166 deletions
--- a/api/core/model_runtime/memory/init.py
+++ b/api/core/model_runtime/memory/init.py
@ -0,0 +1,3 @@
+from .prompt_message_memory import DEFAULT_MEMORY_MAX_TOKEN_LIMIT, PromptMessageMemory
+
+__all__ = ["DEFAULT_MEMORY_MAX_TOKEN_LIMIT", "PromptMessageMemory"]
--- a/api/core/model_runtime/memory/prompt_message_memory.py
+++ b/api/core/model_runtime/memory/prompt_message_memory.py
@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Protocol
+
+from core.model_runtime.entities import PromptMessage
+
+DEFAULT_MEMORY_MAX_TOKEN_LIMIT = 2000
+
+
+class PromptMessageMemory(Protocol):
+    """Port for loading memory as prompt messages."""
+
+    def get_history_prompt_messages(
+        self, max_token_limit: int = DEFAULT_MEMORY_MAX_TOKEN_LIMIT, message_limit: int | None = None
+    ) -> Sequence[PromptMessage]:
+        """Return historical prompt messages constrained by token/message limits."""
+        ...
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@ -83,19 +83,21 @@ def _merge_tool_call_delta(
        tool_call.function.arguments += delta.function.arguments


-def _build_llm_result_from_first_chunk(
+def _build_llm_result_from_chunks(
    model: str,
    prompt_messages: Sequence[PromptMessage],
    chunks: Iterator[LLMResultChunk],
 ) -> LLMResult:
    """
-    Build a single `LLMResult` from the first returned chunk.
+    Build a single `LLMResult` by accumulating all returned chunks.

-    This is used for `stream=False` because the plugin side may still implement the response via a chunked stream.
+    Some models only support streaming output (e.g. Qwen3 open-source edition)
+    and the plugin side may still implement the response via a chunked stream,
+    so all chunks must be consumed and concatenated into a single ``LLMResult``.

-    Note:
-        This function always drains the `chunks` iterator after reading the first chunk to ensure any underlying
-        streaming resources are released (e.g., HTTP connections owned by the plugin runtime).
+    The ``usage`` is taken from the last chunk that carries it, which is the
+    typical convention for streaming responses (the final chunk contains the
+    aggregated token counts).
    """
    content = ""
    content_list: list[PromptMessageContentUnionTypes] = []
@ -104,24 +106,27 @@ def _build_llm_result_from_first_chunk(
    tools_calls: list[AssistantPromptMessage.ToolCall] = []

    try:
-        first_chunk = next(chunks, None)
-        if first_chunk is not None:
-            if isinstance(first_chunk.delta.message.content, str):
-                content += first_chunk.delta.message.content
-            elif isinstance(first_chunk.delta.message.content, list):
-                content_list.extend(first_chunk.delta.message.content)
+        for chunk in chunks:
+            if isinstance(chunk.delta.message.content, str):
+                content += chunk.delta.message.content
+            elif isinstance(chunk.delta.message.content, list):
+                content_list.extend(chunk.delta.message.content)

-            if first_chunk.delta.message.tool_calls:
-                _increase_tool_call(first_chunk.delta.message.tool_calls, tools_calls)
+            if chunk.delta.message.tool_calls:
+                _increase_tool_call(chunk.delta.message.tool_calls, tools_calls)

-            usage = first_chunk.delta.usage or LLMUsage.empty_usage()
-            system_fingerprint = first_chunk.system_fingerprint
+            if chunk.delta.usage:
+                usage = chunk.delta.usage
+            if chunk.system_fingerprint:
+                system_fingerprint = chunk.system_fingerprint
+    except Exception:
+        logger.exception("Error while consuming non-stream plugin chunk iterator.")
+        raise
    finally:
-        try:
-            for _ in chunks:
-                pass
-        except Exception:
-            logger.debug("Failed to drain non-stream plugin chunk iterator.", exc_info=True)
+        # Drain any remaining chunks to release underlying streaming resources (e.g. HTTP connections).
+        close = getattr(chunks, "close", None)
+        if callable(close):
+            close()

    return LLMResult(
        model=model,
@ -174,7 +179,7 @@ def _normalize_non_stream_plugin_result(
 ) -> LLMResult:
    if isinstance(result, LLMResult):
        return result
-    return _build_llm_result_from_first_chunk(model=model, prompt_messages=prompt_messages, chunks=result)
+    return _build_llm_result_from_chunks(model=model, prompt_messages=prompt_messages, chunks=result)


 def _increase_tool_call(