refactor: LLM plugin invoke parsing (#31499)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-05-02 08:28:03 +08:00 · 2026-01-26 14:59:57 +08:00
parent a51ced0a4f
commit 92011d0a31
4 changed files with 350 additions and 150 deletions
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@ -1,7 +1,7 @@
 import logging
 import time
 import uuid
-from collections.abc import Generator, Sequence
+from collections.abc import Callable, Generator, Iterator, Sequence
 from typing import Union

 from pydantic import ConfigDict
@ -30,6 +30,142 @@ def _gen_tool_call_id() -> str:
    return f"chatcmpl-tool-{str(uuid.uuid4().hex)}"


+def _run_callbacks(callbacks: Sequence[Callback] | None, *, event: str, invoke: Callable[[Callback], None]) -> None:
+    if not callbacks:
+        return
+
+    for callback in callbacks:
+        try:
+            invoke(callback)
+        except Exception as e:
+            if callback.raise_error:
+                raise
+            logger.warning("Callback %s %s failed with error %s", callback.__class__.__name__, event, e)
+
+
+def _get_or_create_tool_call(
+    existing_tools_calls: list[AssistantPromptMessage.ToolCall],
+    tool_call_id: str,
+) -> AssistantPromptMessage.ToolCall:
+    """
+    Get or create a tool call by ID.
+
+    If `tool_call_id` is empty, returns the most recently created tool call.
+    """
+    if not tool_call_id:
+        if not existing_tools_calls:
+            raise ValueError("tool_call_id is empty but no existing tool call is available to apply the delta")
+        return existing_tools_calls[-1]
+
+    tool_call = next((tool_call for tool_call in existing_tools_calls if tool_call.id == tool_call_id), None)
+    if tool_call is None:
+        tool_call = AssistantPromptMessage.ToolCall(
+            id=tool_call_id,
+            type="function",
+            function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
+        )
+        existing_tools_calls.append(tool_call)
+
+    return tool_call
+
+
+def _merge_tool_call_delta(
+    tool_call: AssistantPromptMessage.ToolCall,
+    delta: AssistantPromptMessage.ToolCall,
+) -> None:
+    if delta.id:
+        tool_call.id = delta.id
+    if delta.type:
+        tool_call.type = delta.type
+    if delta.function.name:
+        tool_call.function.name = delta.function.name
+    if delta.function.arguments:
+        tool_call.function.arguments += delta.function.arguments
+
+
+def _build_llm_result_from_first_chunk(
+    model: str,
+    prompt_messages: Sequence[PromptMessage],
+    chunks: Iterator[LLMResultChunk],
+) -> LLMResult:
+    """
+    Build a single `LLMResult` from the first returned chunk.
+
+    This is used for `stream=False` because the plugin side may still implement the response via a chunked stream.
+    """
+    content = ""
+    content_list: list[PromptMessageContentUnionTypes] = []
+    usage = LLMUsage.empty_usage()
+    system_fingerprint: str | None = None
+    tools_calls: list[AssistantPromptMessage.ToolCall] = []
+
+    first_chunk = next(chunks, None)
+    if first_chunk is not None:
+        if isinstance(first_chunk.delta.message.content, str):
+            content += first_chunk.delta.message.content
+        elif isinstance(first_chunk.delta.message.content, list):
+            content_list.extend(first_chunk.delta.message.content)
+
+        if first_chunk.delta.message.tool_calls:
+            _increase_tool_call(first_chunk.delta.message.tool_calls, tools_calls)
+
+        usage = first_chunk.delta.usage or LLMUsage.empty_usage()
+        system_fingerprint = first_chunk.system_fingerprint
+
+    return LLMResult(
+        model=model,
+        prompt_messages=prompt_messages,
+        message=AssistantPromptMessage(
+            content=content or content_list,
+            tool_calls=tools_calls,
+        ),
+        usage=usage,
+        system_fingerprint=system_fingerprint,
+    )
+
+
+def _invoke_llm_via_plugin(
+    *,
+    tenant_id: str,
+    user_id: str,
+    plugin_id: str,
+    provider: str,
+    model: str,
+    credentials: dict,
+    model_parameters: dict,
+    prompt_messages: Sequence[PromptMessage],
+    tools: list[PromptMessageTool] | None,
+    stop: Sequence[str] | None,
+    stream: bool,
+) -> Union[LLMResult, Generator[LLMResultChunk, None, None]]:
+    from core.plugin.impl.model import PluginModelClient
+
+    plugin_model_manager = PluginModelClient()
+    return plugin_model_manager.invoke_llm(
+        tenant_id=tenant_id,
+        user_id=user_id,
+        plugin_id=plugin_id,
+        provider=provider,
+        model=model,
+        credentials=credentials,
+        model_parameters=model_parameters,
+        prompt_messages=list(prompt_messages),
+        tools=tools,
+        stop=list(stop) if stop else None,
+        stream=stream,
+    )
+
+
+def _normalize_non_stream_plugin_result(
+    model: str,
+    prompt_messages: Sequence[PromptMessage],
+    result: Union[LLMResult, Iterator[LLMResultChunk]],
+) -> LLMResult:
+    if isinstance(result, LLMResult):
+        return result
+    return _build_llm_result_from_first_chunk(model=model, prompt_messages=prompt_messages, chunks=result)
+
+
 def _increase_tool_call(
    new_tool_calls: list[AssistantPromptMessage.ToolCall], existing_tools_calls: list[AssistantPromptMessage.ToolCall]
 ):
@ -40,42 +176,13 @@ def _increase_tool_call(
    :param existing_tools_calls: List of existing tool calls to be modified IN-PLACE.
    """

-    def get_tool_call(tool_call_id: str):
-        """
-        Get or create a tool call by ID
-
-        :param tool_call_id: tool call ID
-        :return: existing or new tool call
-        """
-        if not tool_call_id:
-            return existing_tools_calls[-1]
-
-        _tool_call = next((_tool_call for _tool_call in existing_tools_calls if _tool_call.id == tool_call_id), None)
-        if _tool_call is None:
-            _tool_call = AssistantPromptMessage.ToolCall(
-                id=tool_call_id,
-                type="function",
-                function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
-            )
-            existing_tools_calls.append(_tool_call)
-
-        return _tool_call
-
    for new_tool_call in new_tool_calls:
        # generate ID for tool calls with function name but no ID to track them
        if new_tool_call.function.name and not new_tool_call.id:
            new_tool_call.id = _gen_tool_call_id()
-        # get tool call
-        tool_call = get_tool_call(new_tool_call.id)
-        # update tool call
-        if new_tool_call.id:
-            tool_call.id = new_tool_call.id
-        if new_tool_call.type:
-            tool_call.type = new_tool_call.type
-        if new_tool_call.function.name:
-            tool_call.function.name = new_tool_call.function.name
-        if new_tool_call.function.arguments:
-            tool_call.function.arguments += new_tool_call.function.arguments
+
+        tool_call = _get_or_create_tool_call(existing_tools_calls, new_tool_call.id)
+        _merge_tool_call_delta(tool_call, new_tool_call)


 class LargeLanguageModel(AIModel):
@ -141,10 +248,7 @@ class LargeLanguageModel(AIModel):
        result: Union[LLMResult, Generator[LLMResultChunk, None, None]]

        try:
-            from core.plugin.impl.model import PluginModelClient
-
-            plugin_model_manager = PluginModelClient()
-            result = plugin_model_manager.invoke_llm(
+            result = _invoke_llm_via_plugin(
                tenant_id=self.tenant_id,
                user_id=user or "unknown",
                plugin_id=self.plugin_id,
@ -154,38 +258,13 @@ class LargeLanguageModel(AIModel):
                model_parameters=model_parameters,
                prompt_messages=prompt_messages,
                tools=tools,
-                stop=list(stop) if stop else None,
+                stop=stop,
                stream=stream,
            )

            if not stream:
-                content = ""
-                content_list = []
-                usage = LLMUsage.empty_usage()
-                system_fingerprint = None
-                tools_calls: list[AssistantPromptMessage.ToolCall] = []
-
-                for chunk in result:
-                    if isinstance(chunk.delta.message.content, str):
-                        content += chunk.delta.message.content
-                    elif isinstance(chunk.delta.message.content, list):
-                        content_list.extend(chunk.delta.message.content)
-                    if chunk.delta.message.tool_calls:
-                        _increase_tool_call(chunk.delta.message.tool_calls, tools_calls)
-
-                    usage = chunk.delta.usage or LLMUsage.empty_usage()
-                    system_fingerprint = chunk.system_fingerprint
-                    break
-
-                result = LLMResult(
-                    model=model,
-                    prompt_messages=prompt_messages,
-                    message=AssistantPromptMessage(
-                        content=content or content_list,
-                        tool_calls=tools_calls,
-                    ),
-                    usage=usage,
-                    system_fingerprint=system_fingerprint,
+                result = _normalize_non_stream_plugin_result(
+                    model=model, prompt_messages=prompt_messages, result=result
                )
        except Exception as e:
            self._trigger_invoke_error_callbacks(
@ -425,27 +504,21 @@ class LargeLanguageModel(AIModel):
        :param user: unique user id
        :param callbacks: callbacks
        """
-        if callbacks:
-            for callback in callbacks:
-                try:
-                    callback.on_before_invoke(
-                        llm_instance=self,
-                        model=model,
-                        credentials=credentials,
-                        prompt_messages=prompt_messages,
-                        model_parameters=model_parameters,
-                        tools=tools,
-                        stop=stop,
-                        stream=stream,
-                        user=user,
-                    )
-                except Exception as e:
-                    if callback.raise_error:
-                        raise e
-                    else:
-                        logger.warning(
-                            "Callback %s on_before_invoke failed with error %s", callback.__class__.__name__, e
-                        )
+        _run_callbacks(
+            callbacks,
+            event="on_before_invoke",
+            invoke=lambda callback: callback.on_before_invoke(
+                llm_instance=self,
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+            ),
+        )

    def _trigger_new_chunk_callbacks(
        self,
@ -473,26 +546,22 @@ class LargeLanguageModel(AIModel):
        :param stream: is stream response
        :param user: unique user id
        """
-        if callbacks:
-            for callback in callbacks:
-                try:
-                    callback.on_new_chunk(
-                        llm_instance=self,
-                        chunk=chunk,
-                        model=model,
-                        credentials=credentials,
-                        prompt_messages=prompt_messages,
-                        model_parameters=model_parameters,
-                        tools=tools,
-                        stop=stop,
-                        stream=stream,
-                        user=user,
-                    )
-                except Exception as e:
-                    if callback.raise_error:
-                        raise e
-                    else:
-                        logger.warning("Callback %s on_new_chunk failed with error %s", callback.__class__.__name__, e)
+        _run_callbacks(
+            callbacks,
+            event="on_new_chunk",
+            invoke=lambda callback: callback.on_new_chunk(
+                llm_instance=self,
+                chunk=chunk,
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+            ),
+        )

    def _trigger_after_invoke_callbacks(
        self,
@ -521,28 +590,22 @@ class LargeLanguageModel(AIModel):
        :param user: unique user id
        :param callbacks: callbacks
        """
-        if callbacks:
-            for callback in callbacks:
-                try:
-                    callback.on_after_invoke(
-                        llm_instance=self,
-                        result=result,
-                        model=model,
-                        credentials=credentials,
-                        prompt_messages=prompt_messages,
-                        model_parameters=model_parameters,
-                        tools=tools,
-                        stop=stop,
-                        stream=stream,
-                        user=user,
-                    )
-                except Exception as e:
-                    if callback.raise_error:
-                        raise e
-                    else:
-                        logger.warning(
-                            "Callback %s on_after_invoke failed with error %s", callback.__class__.__name__, e
-                        )
+        _run_callbacks(
+            callbacks,
+            event="on_after_invoke",
+            invoke=lambda callback: callback.on_after_invoke(
+                llm_instance=self,
+                result=result,
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+            ),
+        )

    def _trigger_invoke_error_callbacks(
        self,
@ -571,25 +634,19 @@ class LargeLanguageModel(AIModel):
        :param user: unique user id
        :param callbacks: callbacks
        """
-        if callbacks:
-            for callback in callbacks:
-                try:
-                    callback.on_invoke_error(
-                        llm_instance=self,
-                        ex=ex,
-                        model=model,
-                        credentials=credentials,
-                        prompt_messages=prompt_messages,
-                        model_parameters=model_parameters,
-                        tools=tools,
-                        stop=stop,
-                        stream=stream,
-                        user=user,
-                    )
-                except Exception as e:
-                    if callback.raise_error:
-                        raise e
-                    else:
-                        logger.warning(
-                            "Callback %s on_invoke_error failed with error %s", callback.__class__.__name__, e
-                        )
+        _run_callbacks(
+            callbacks,
+            event="on_invoke_error",
+            invoke=lambda callback: callback.on_invoke_error(
+                llm_instance=self,
+                ex=ex,
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+            ),
+        )