revert: add tools for output in agent mode

feat: hide output tools and improve JSON formatting for structured output feat: hide output tools and improve JSON formatting for structured output fix: handle prompt template correctly to extract selectors for step run fix: emit StreamChunkEvent correctly for sandbox agent chore: better debug message fix: incorrect output tool runtime selection fix: type issues fix: align parameter list fix: align parameter list fix: hide internal builtin providers from tool list vibe: implement file structured output vibe: implement file structured output fix: refix parameter for tool fix: crash fix: crash refactor: remove union types fix: type check Merge branch 'feat/structured-output-with-sandbox' into feat/support-agent-sandbox fix: provide json as text fix: provide json as text fix: get AgentResult correctly fix: provides correct prompts, tools and terminal predicates fix: provides correct prompts, tools and terminal predicates fix: circular import feat: support structured output in sandbox and tool mode
2026-03-30 02:20:16 +08:00 · 2026-02-04 21:13:07 +08:00
parent 25065a4f2f
commit e0082dbf18
41 changed files with 1014 additions and 1358 deletions
--- a/api/core/agent/patterns/base.py
+++ b/api/core/agent/patterns/base.py
@ -10,7 +10,6 @@ from collections.abc import Callable, Generator
 from typing import TYPE_CHECKING, Any

 from core.agent.entities import AgentLog, AgentResult, ExecutionContext
-from core.agent.output_tools import ILLEGAL_OUTPUT_TOOL
 from core.file import File
 from core.model_manager import ModelInstance
 from core.model_runtime.entities import (
@ -57,7 +56,8 @@ class AgentPattern(ABC):

    @abstractmethod
    def run(
-        self, prompt_messages: list[PromptMessage], model_parameters: dict[str, Any], stop: list[str]
+        self, prompt_messages: list[PromptMessage], model_parameters: dict[str, Any], stop: list[str] = [],
+        stream: bool = True,
    ) -> Generator[LLMResultChunk | AgentLog, None, AgentResult]:
        """Execute the agent strategy."""
        pass
@ -462,8 +462,6 @@ class AgentPattern(ABC):
        """Convert tools to prompt message format."""
        prompt_tools: list[PromptMessageTool] = []
        for tool in self.tools:
-            if tool.entity.identity.name == ILLEGAL_OUTPUT_TOOL:
-                continue
            prompt_tools.append(tool.to_prompt_message_tool())
        return prompt_tools

--- a/api/core/agent/patterns/function_call.py
+++ b/api/core/agent/patterns/function_call.py
@ -1,15 +1,10 @@
 """Function Call strategy implementation."""

 import json
-import uuid
 from collections.abc import Generator
-from typing import Any, Literal, Protocol, Union, cast
+from typing import Any, Union

 from core.agent.entities import AgentLog, AgentResult
-from core.agent.output_tools import (
-    ILLEGAL_OUTPUT_TOOL,
-    TERMINAL_OUTPUT_MESSAGE,
-)
 from core.file import File
 from core.model_runtime.entities import (
    AssistantPromptMessage,
@ -30,7 +25,8 @@ class FunctionCallStrategy(AgentPattern):
    """Function Call strategy using model's native tool calling capability."""

    def run(
-        self, prompt_messages: list[PromptMessage], model_parameters: dict[str, Any], stop: list[str]
+        self, prompt_messages: list[PromptMessage], model_parameters: dict[str, Any], stop: list[str] = [],
+        stream: bool = True,
    ) -> Generator[LLMResultChunk | AgentLog, None, AgentResult]:
        """Execute the function call agent strategy."""
        # Convert tools to prompt format
@ -43,23 +39,9 @@ class FunctionCallStrategy(AgentPattern):
        total_usage: dict[str, LLMUsage | None] = {"usage": None}
        messages: list[PromptMessage] = list(prompt_messages)  # Create mutable copy
        final_text: str = ""
-        final_tool_args: dict[str, Any] = {"!!!": "!!!"}
        finish_reason: str | None = None
        output_files: list[File] = []  # Track files produced by tools

-        class _LLMInvoker(Protocol):
-            def invoke_llm(
-                self,
-                *,
-                prompt_messages: list[PromptMessage],
-                model_parameters: dict[str, Any],
-                tools: list[PromptMessageTool],
-                stop: list[str],
-                stream: Literal[False],
-                user: str | None,
-                callbacks: list[Any],
-            ) -> LLMResult: ...
-
        while function_call_state and iteration_step <= max_iterations:
            function_call_state = False
            round_log = self._create_log(
@ -69,7 +51,8 @@ class FunctionCallStrategy(AgentPattern):
                data={},
            )
            yield round_log
-
+            # On last iteration, remove tools to force final answer
+            current_tools: list[PromptMessageTool] = [] if iteration_step == max_iterations else prompt_tools
            model_log = self._create_log(
                label=f"{self.model_instance.model} Thought",
                log_type=AgentLog.LogType.THOUGHT,
@ -86,63 +69,47 @@ class FunctionCallStrategy(AgentPattern):
            round_usage: dict[str, LLMUsage | None] = {"usage": None}

            # Invoke model
-            invoker = cast(_LLMInvoker, self.model_instance)
-            chunks = invoker.invoke_llm(
+            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = self.model_instance.invoke_llm(
                prompt_messages=messages,
                model_parameters=model_parameters,
-                tools=prompt_tools,
+                tools=current_tools,
                stop=stop,
-                stream=False,
+                stream=stream,
                user=self.context.user_id,
                callbacks=[],
            )

            # Process response
            tool_calls, response_content, chunk_finish_reason = yield from self._handle_chunks(
-                chunks, round_usage, model_log, emit_chunks=False
+                chunks, round_usage, model_log
            )
-
-            if response_content:
-                replaced_tool_call = (
-                    str(uuid.uuid4()),
-                    ILLEGAL_OUTPUT_TOOL,
-                    {
-                        "raw": response_content,
-                    },
-                )
-                tool_calls.append(replaced_tool_call)
-
-            messages.append(self._create_assistant_message("", tool_calls))
+            messages.append(self._create_assistant_message(response_content, tool_calls))

            # Accumulate to total usage
            round_usage_value = round_usage.get("usage")
            if round_usage_value:
                self._accumulate_usage(total_usage, round_usage_value)

+            # Update final text if no tool calls (this is likely the final answer)
+            if not tool_calls:
+                final_text = response_content
+
            # Update finish reason
            if chunk_finish_reason:
                finish_reason = chunk_finish_reason

-            assert len(tool_calls) > 0
-
            # Process tool calls
            tool_outputs: dict[str, str] = {}
-            function_call_state = True
-            # Execute tools
-            for tool_call_id, tool_name, tool_args in tool_calls:
-                tool_response, tool_files, _ = yield from self._handle_tool_call(
-                    tool_name, tool_args, tool_call_id, messages, round_log
-                )
-                tool_entity = self._find_tool_by_name(tool_name)
-                if not tool_entity:
-                    raise ValueError(f"Tool {tool_name} not found")
-                tool_outputs[tool_name] = tool_response
-                # Track files produced by tools
-                output_files.extend(tool_files)
-                if tool_response == TERMINAL_OUTPUT_MESSAGE:
-                    function_call_state = False
-                    final_tool_args = tool_entity.transform_tool_parameters_type(tool_args)
-
+            if tool_calls:
+                function_call_state = True
+                # Execute tools
+                for tool_call_id, tool_name, tool_args in tool_calls:
+                    tool_response, tool_files, _ = yield from self._handle_tool_call(
+                        tool_name, tool_args, tool_call_id, messages, round_log
+                    )
+                    tool_outputs[tool_name] = tool_response
+                    # Track files produced by tools
+                    output_files.extend(tool_files)
            yield self._finish_log(
                round_log,
                data={
@ -161,19 +128,8 @@ class FunctionCallStrategy(AgentPattern):
        # Return final result
        from core.agent.entities import AgentResult

-        output_payload: str | dict
-        output_text = final_tool_args.get("text")
-        output_structured_payload = final_tool_args.get("data")
-
-        if isinstance(output_structured_payload, dict):
-            output_payload = output_structured_payload
-        elif isinstance(output_text, str):
-            output_payload = output_text
-        else:
-            raise ValueError(f"Final output ({final_tool_args}) is not a string or structured data.")
-
        return AgentResult(
-            output=output_payload,
+            text=final_text,
            files=output_files,
            usage=total_usage.get("usage") or LLMUsage.empty_usage(),
            finish_reason=finish_reason,
@ -184,8 +140,6 @@ class FunctionCallStrategy(AgentPattern):
        chunks: Union[Generator[LLMResultChunk, None, None], LLMResult],
        llm_usage: dict[str, LLMUsage | None],
        start_log: AgentLog,
-        *,
-        emit_chunks: bool,
    ) -> Generator[
        LLMResultChunk | AgentLog,
        None,
@ -217,8 +171,7 @@ class FunctionCallStrategy(AgentPattern):
                if chunk.delta.finish_reason:
                    finish_reason = chunk.delta.finish_reason

-                if emit_chunks:
-                    yield chunk
+                yield chunk
        else:
            # Non-streaming response
            result: LLMResult = chunks
@ -233,12 +186,11 @@ class FunctionCallStrategy(AgentPattern):
                self._accumulate_usage(llm_usage, result.usage)

            # Convert to streaming format
-            if emit_chunks:
-                yield LLMResultChunk(
-                    model=result.model,
-                    prompt_messages=result.prompt_messages,
-                    delta=LLMResultChunkDelta(index=0, message=result.message, usage=result.usage),
-                )
+            yield LLMResultChunk(
+                model=result.model,
+                prompt_messages=result.prompt_messages,
+                delta=LLMResultChunkDelta(index=0, message=result.message, usage=result.usage),
+            )
        yield self._finish_log(
            start_log,
            data={
@ -248,14 +200,6 @@ class FunctionCallStrategy(AgentPattern):
        )
        return tool_calls, response_content, finish_reason

-    @staticmethod
-    def _format_output_text(value: Any) -> str:
-        if value is None:
-            return ""
-        if isinstance(value, str):
-            return value
-        return json.dumps(value, ensure_ascii=False)
-
    def _create_assistant_message(
        self, content: str, tool_calls: list[tuple[str, str, dict[str, Any]]] | None = None
    ) -> AssistantPromptMessage:
--- a/api/core/agent/patterns/react.py
+++ b/api/core/agent/patterns/react.py
@ -4,17 +4,10 @@ from __future__ import annotations

 import json
 from collections.abc import Generator
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Union

 from core.agent.entities import AgentLog, AgentResult, AgentScratchpadUnit, ExecutionContext
 from core.agent.output_parser.cot_output_parser import CotAgentOutputParser
-from core.agent.output_tools import (
-    FINAL_OUTPUT_TOOL,
-    FINAL_STRUCTURED_OUTPUT_TOOL,
-    ILLEGAL_OUTPUT_TOOL,
-    OUTPUT_TEXT_TOOL,
-    OUTPUT_TOOL_NAME_SET,
-)
 from core.file import File
 from core.model_manager import ModelInstance
 from core.model_runtime.entities import (
@ -59,7 +52,8 @@ class ReActStrategy(AgentPattern):
        self.instruction = instruction

    def run(
-        self, prompt_messages: list[PromptMessage], model_parameters: dict[str, Any], stop: list[str]
+        self, prompt_messages: list[PromptMessage], model_parameters: dict[str, Any], stop: list[str] = [],
+        stream: bool = True,
    ) -> Generator[LLMResultChunk | AgentLog, None, AgentResult]:
        """Execute the ReAct agent strategy."""
        # Initialize tracking
@ -71,19 +65,6 @@ class ReActStrategy(AgentPattern):
        output_files: list[File] = []  # Track files produced by tools
        final_text: str = ""
        finish_reason: str | None = None
-        tool_instance_names = {tool.entity.identity.name for tool in self.tools}
-        available_output_tool_names = {
-            tool_name
-            for tool_name in tool_instance_names
-            if tool_name in OUTPUT_TOOL_NAME_SET and tool_name != ILLEGAL_OUTPUT_TOOL
-        }
-        if FINAL_STRUCTURED_OUTPUT_TOOL in available_output_tool_names:
-            terminal_tool_name = FINAL_STRUCTURED_OUTPUT_TOOL
-        elif FINAL_OUTPUT_TOOL in available_output_tool_names:
-            terminal_tool_name = FINAL_OUTPUT_TOOL
-        else:
-            raise ValueError("No terminal output tool configured")
-        allow_illegal_output = ILLEGAL_OUTPUT_TOOL in tool_instance_names

        # Add "Observation" to stop sequences
        if "Observation" not in stop:
@ -100,15 +81,10 @@ class ReActStrategy(AgentPattern):
            )
            yield round_log

-            # Build prompt with tool restrictions on last iteration
-            if iteration_step == max_iterations:
-                tools_for_prompt = [
-                    tool for tool in self.tools if tool.entity.identity.name in available_output_tool_names
-                ]
-            else:
-                tools_for_prompt = [tool for tool in self.tools if tool.entity.identity.name != ILLEGAL_OUTPUT_TOOL]
+            # Build prompt with/without tools based on iteration
+            include_tools = iteration_step < max_iterations
            current_messages = self._build_prompt_with_react_format(
-                prompt_messages, agent_scratchpad, tools_for_prompt, self.instruction
+                prompt_messages, agent_scratchpad, include_tools, self.instruction
            )

            model_log = self._create_log(
@ -130,18 +106,18 @@ class ReActStrategy(AgentPattern):
            messages_to_use = current_messages

            # Invoke model
-            chunks = self.model_instance.invoke_llm(
+            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = self.model_instance.invoke_llm(
                prompt_messages=messages_to_use,
                model_parameters=model_parameters,
                stop=stop,
-                stream=False,
+                stream=stream,
                user=self.context.user_id or "",
                callbacks=[],
            )

            # Process response
            scratchpad, chunk_finish_reason = yield from self._handle_chunks(
-                chunks, round_usage, model_log, current_messages, emit_chunks=False
+                chunks, round_usage, model_log, current_messages
            )
            agent_scratchpad.append(scratchpad)

@ -155,44 +131,28 @@ class ReActStrategy(AgentPattern):
                finish_reason = chunk_finish_reason

            # Check if we have an action to execute
-            if scratchpad.action is None:
-                if not allow_illegal_output:
-                    raise ValueError("Model did not call any tools")
-                illegal_action = AgentScratchpadUnit.Action(
-                    action_name=ILLEGAL_OUTPUT_TOOL,
-                    action_input={"raw": scratchpad.thought or ""},
-                )
-                scratchpad.action = illegal_action
-                scratchpad.action_str = illegal_action.model_dump_json()
+            if scratchpad.action and scratchpad.action.action_name.lower() != "final answer":
                react_state = True
-                observation, tool_files = yield from self._handle_tool_call(illegal_action, current_messages, round_log)
-                scratchpad.observation = observation
-                output_files.extend(tool_files)
-            else:
-                action_name = scratchpad.action.action_name
-                if action_name == OUTPUT_TEXT_TOOL and isinstance(scratchpad.action.action_input, dict):
-                    pass  # output_text_payload = scratchpad.action.action_input.get("text")
-                elif action_name == FINAL_STRUCTURED_OUTPUT_TOOL and isinstance(scratchpad.action.action_input, dict):
-                    data = scratchpad.action.action_input.get("data")
-                    if isinstance(data, dict):
-                        pass  # structured_output_payload = data
-                elif action_name == FINAL_OUTPUT_TOOL:
-                    if isinstance(scratchpad.action.action_input, dict):
-                        final_text = self._format_output_text(scratchpad.action.action_input.get("text"))
-                    else:
-                        final_text = self._format_output_text(scratchpad.action.action_input)
-
+                # Execute tool
                observation, tool_files = yield from self._handle_tool_call(
                    scratchpad.action, current_messages, round_log
                )
                scratchpad.observation = observation
+                # Track files produced by tools
                output_files.extend(tool_files)

-                if action_name == terminal_tool_name:
-                    pass  # terminal_output_seen = True
-                    react_state = False
-                else:
-                    react_state = True
+                # Add observation to scratchpad for display
+                yield self._create_text_chunk(f"\nObservation: {observation}\n", current_messages)
+            else:
+                # Extract final answer
+                if scratchpad.action and scratchpad.action.action_input:
+                    final_answer = scratchpad.action.action_input
+                    if isinstance(final_answer, dict):
+                        final_answer = json.dumps(final_answer, ensure_ascii=False)
+                    final_text = str(final_answer)
+                elif scratchpad.thought:
+                    # If no action but we have thought, use thought as final answer
+                    final_text = scratchpad.thought

            yield self._finish_log(
                round_log,
@ -208,22 +168,17 @@ class ReActStrategy(AgentPattern):

        # Return final result

-        output_payload: str | dict
-
-        # TODO
+        from core.agent.entities import AgentResult

        return AgentResult(
-            output=output_payload,
-            files=output_files,
-            usage=total_usage.get("usage"),
-            finish_reason=finish_reason,
+            text=final_text, files=output_files, usage=total_usage.get("usage"), finish_reason=finish_reason
        )

    def _build_prompt_with_react_format(
        self,
        original_messages: list[PromptMessage],
        agent_scratchpad: list[AgentScratchpadUnit],
-        tools: list[Tool] | None,
+        include_tools: bool = True,
        instruction: str = "",
    ) -> list[PromptMessage]:
        """Build prompt messages with ReAct format."""
@ -240,13 +195,9 @@ class ReActStrategy(AgentPattern):
                # Format tools
                tools_str = ""
                tool_names = []
-                if tools:
+                if include_tools and self.tools:
                    # Convert tools to prompt message tools format
-                    prompt_tools = [
-                        tool.to_prompt_message_tool()
-                        for tool in tools
-                        if tool.entity.identity.name != ILLEGAL_OUTPUT_TOOL
-                    ]
+                    prompt_tools = [tool.to_prompt_message_tool() for tool in self.tools]
                    tool_names = [tool.name for tool in prompt_tools]

                    # Format tools as JSON for comprehensive information
@ -258,19 +209,12 @@ class ReActStrategy(AgentPattern):
                    tools_str = "No tools available"
                    tool_names_str = ""

-                final_tool_name = FINAL_OUTPUT_TOOL
-                if FINAL_STRUCTURED_OUTPUT_TOOL in tool_names:
-                    final_tool_name = FINAL_STRUCTURED_OUTPUT_TOOL
-                if final_tool_name not in tool_names:
-                    raise ValueError("No terminal output tool available for prompt")
-
                # Replace placeholders in the existing system prompt
                updated_content = msg.content
                assert isinstance(updated_content, str)
                updated_content = updated_content.replace("{{instruction}}", instruction)
                updated_content = updated_content.replace("{{tools}}", tools_str)
                updated_content = updated_content.replace("{{tool_names}}", tool_names_str)
-                updated_content = updated_content.replace("{{final_tool_name}}", final_tool_name)

                # Create new SystemPromptMessage with updated content
                messages[i] = SystemPromptMessage(content=updated_content)
@ -302,12 +246,10 @@ class ReActStrategy(AgentPattern):

    def _handle_chunks(
        self,
-        chunks: LLMResult,
+        chunks: Union[Generator[LLMResultChunk, None, None], LLMResult],
        llm_usage: dict[str, Any],
        model_log: AgentLog,
        current_messages: list[PromptMessage],
-        *,
-        emit_chunks: bool,
    ) -> Generator[
        LLMResultChunk | AgentLog,
        None,
@ -319,20 +261,25 @@ class ReActStrategy(AgentPattern):
        """
        usage_dict: dict[str, Any] = {}

-        def result_to_chunks() -> Generator[LLMResultChunk, None, None]:
-            yield LLMResultChunk(
-                model=chunks.model,
-                prompt_messages=chunks.prompt_messages,
-                delta=LLMResultChunkDelta(
-                    index=0,
-                    message=chunks.message,
-                    usage=chunks.usage,
-                    finish_reason=None,  # LLMResult doesn't have finish_reason, only streaming chunks do
-                ),
-                system_fingerprint=chunks.system_fingerprint or "",
-            )
+        # Convert non-streaming to streaming format if needed
+        if isinstance(chunks, LLMResult):
+            # Create a generator from the LLMResult
+            def result_to_chunks() -> Generator[LLMResultChunk, None, None]:
+                yield LLMResultChunk(
+                    model=chunks.model,
+                    prompt_messages=chunks.prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=0,
+                        message=chunks.message,
+                        usage=chunks.usage,
+                        finish_reason=None,  # LLMResult doesn't have finish_reason, only streaming chunks do
+                    ),
+                    system_fingerprint=chunks.system_fingerprint or "",
+                )

-        streaming_chunks = result_to_chunks()
+            streaming_chunks = result_to_chunks()
+        else:
+            streaming_chunks = chunks

        react_chunks = CotAgentOutputParser.handle_react_stream_output(streaming_chunks, usage_dict)

@ -356,18 +303,14 @@ class ReActStrategy(AgentPattern):
                scratchpad.action_str = action_str
                scratchpad.action = chunk

-                if emit_chunks:
-                    yield self._create_text_chunk(json.dumps(chunk.model_dump()), current_messages)
-            elif isinstance(chunk, str):
+                yield self._create_text_chunk(json.dumps(chunk.model_dump()), current_messages)
+            else:
                # Text chunk
                chunk_text = str(chunk)
                scratchpad.agent_response = (scratchpad.agent_response or "") + chunk_text
                scratchpad.thought = (scratchpad.thought or "") + chunk_text

-                if emit_chunks:
-                    yield self._create_text_chunk(chunk_text, current_messages)
-            else:
-                raise ValueError(f"Unexpected chunk type: {type(chunk)}")
+                yield self._create_text_chunk(chunk_text, current_messages)

        # Update usage
        if usage_dict.get("usage"):
@ -391,14 +334,6 @@ class ReActStrategy(AgentPattern):

        return scratchpad, finish_reason

-    @staticmethod
-    def _format_output_text(value: Any) -> str:
-        if value is None:
-            return ""
-        if isinstance(value, str):
-            return value
-        return json.dumps(value, ensure_ascii=False)
-
    def _handle_tool_call(
        self,
        action: AgentScratchpadUnit.Action,
--- a/api/core/agent/patterns/strategy_factory.py
+++ b/api/core/agent/patterns/strategy_factory.py
@ -2,17 +2,13 @@

 from __future__ import annotations

-from collections.abc import Mapping
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING

 from core.agent.entities import AgentEntity, ExecutionContext
 from core.file.models import File
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelFeature

-from ...app.entities.app_invoke_entities import InvokeFrom
-from ...tools.entities.tool_entities import ToolInvokeFrom
-from ..output_tools import build_agent_output_tools
 from .base import AgentPattern, ToolInvokeHook
 from .function_call import FunctionCallStrategy
 from .react import ReActStrategy
@ -29,10 +25,6 @@ class StrategyFactory:

    @staticmethod
    def create_strategy(
-        *,
-        tenant_id: str,
-        invoke_from: InvokeFrom,
-        tool_invoke_from: ToolInvokeFrom,
        model_features: list[ModelFeature],
        model_instance: ModelInstance,
        context: ExecutionContext,
@ -43,16 +35,11 @@ class StrategyFactory:
        agent_strategy: AgentEntity.Strategy | None = None,
        tool_invoke_hook: ToolInvokeHook | None = None,
        instruction: str = "",
-        structured_output_schema: Mapping[str, Any] | None = None,
    ) -> AgentPattern:
        """
        Create an appropriate strategy based on model features.

        Args:
-            tenant_id:
-            invoke_from:
-            tool_invoke_from:
-            structured_output_schema:
            model_features: List of model features/capabilities
            model_instance: Model instance to use
            context: Execution context containing trace/audit information
@ -67,14 +54,6 @@ class StrategyFactory:
        Returns:
            AgentStrategy instance
        """
-        output_tools = build_agent_output_tools(
-            tenant_id=tenant_id,
-            invoke_from=invoke_from,
-            tool_invoke_from=tool_invoke_from,
-            structured_output_schema=structured_output_schema,
-        )
-
-        tools.extend(output_tools)

        # If explicit strategy is provided and it's Function Calling, try to use it if supported
        if agent_strategy == AgentEntity.Strategy.FUNCTION_CALLING: