From 5b45cf7c82cb4ba714bc90271385f686330d1dc6 Mon Sep 17 00:00:00 2001 From: Yansong Zhang <916125788@qq.com> Date: Wed, 3 Jun 2026 11:10:00 +0800 Subject: [PATCH] fix(dify-agent): rename dify.shell tools to provider-valid names (shell.run -> shell_run) The dify.shell layer registered its four tools as shell.run / shell.wait / shell.input / shell.interrupt. OpenAI and Anthropic function-calling both require tool names matching ^[a-zA-Z0-9_-]+$, so any Agent run that injected the shell layer failed at the first model call with HTTP 400 ("tools[0].function.name ... does not match pattern"). Rename the four tools (and their in-prompt references) to use underscores so the names are valid for the providers we ship. The change is self-consistent: the advertised tool name, the prompt prose, and pydantic-ai's dispatch key all use the underscore form. Updated the shell layer + runner unit tests that pinned the dotted names. Verified live: an Agent App agent autonomously calls shell_run, executes bash in its isolated per-session workspace (~/workspace/), and the real stdout flows back through the agent-backend SSE bridge. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/dify_agent/layers/shell/layer.py | 44 +++++++++---------- .../dify_agent/layers/shell/test_layer.py | 26 +++++------ .../local/dify_agent/runtime/test_runner.py | 4 +- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/dify-agent/src/dify_agent/layers/shell/layer.py b/dify-agent/src/dify_agent/layers/shell/layer.py index d265075631c..797964d85b2 100644 --- a/dify-agent/src/dify_agent/layers/shell/layer.py +++ b/dify-agent/src/dify_agent/layers/shell/layer.py @@ -1,7 +1,7 @@ """Shellctl-backed Dify shell layer. ``DifyShellLayer`` is a stateful pydantic-ai tool layer that exposes exactly -``shell.run``, ``shell.wait``, ``shell.input``, and ``shell.interrupt``. The +``shell_run``, ``shell_wait``, ``shell_input``, and ``shell_interrupt``. The layer persists only JSON-safe shell session state in ``runtime_state`` and keeps its live shellctl HTTP client on the layer instance only while ``resource_context()`` is active. Agenton enters that resource scope before @@ -58,51 +58,51 @@ _SESSION_ID_ATTEMPT_LIMIT = 256 _SESSION_ID_PATTERN = re.compile(r"^[0-9a-f]{7}$") _SHELL_LAYER_PREFIX_PROMPT = """You have access to a shell layer. It provides four tools: -1. shell.run +1. shell_run Start a new shell job in the current isolated workspace. Use it to execute commands or scripts. -2. shell.wait +2. shell_wait Wait for more output or completion from an existing shell job. - Use it when shell.run returns done=false. + Use it when shell_run returns done=false. -3. shell.input +3. shell_input Send stdin text to a running shell job, then wait for new output. Use it for interactive commands that are waiting for input. -4. shell.interrupt +4. shell_interrupt Interrupt a running shell job. Use it to stop a long-running, stuck, or no-longer-needed command. Common arguments: - script: - The command or script to execute. Used by shell.run. + The command or script to execute. Used by shell_run. - job_id: - The id of a shell job returned by shell.run. - Use it with shell.wait, shell.input, and shell.interrupt. + The id of a shell job returned by shell_run. + Use it with shell_wait, shell_input, and shell_interrupt. Never invent a job_id. - timeout: Maximum time, in seconds, to wait for output or completion for this tool call. - A timeout does not necessarily mean the job has stopped; if done=false, use shell.wait again. + A timeout does not necessarily mean the job has stopped; if done=false, use shell_wait again. - text: - Text to send to the running process stdin. Used by shell.input. + Text to send to the running process stdin. Used by shell_input. Include "\\n" if the process expects Enter. - grace_seconds: - Time to wait after interrupting before forceful cleanup. Used by shell.interrupt. + Time to wait after interrupting before forceful cleanup. Used by shell_interrupt. Usage rules: -- Start with shell.run. -- If shell.run returns done=false, call shell.wait with the returned job_id. -- Use shell.input only when the job is running and waiting for stdin. -- Use shell.interrupt when a job is stuck or should be stopped. +- Start with shell_run. +- If shell_run returns done=false, call shell_wait with the returned job_id. +- Use shell_input only when the job is running and waiting for stdin. +- Use shell_interrupt when a job is stuck or should be stopped. -The script argument of shell.run can be a normal shell script, or a shebang script. +The script argument of shell_run can be a normal shell script, or a shebang script. If the first line is a shebang, the shell layer executes the script directly. Tips: @@ -273,7 +273,7 @@ class DifyShellLayer(PydanticAILayer[NoLayerDeps, object, DifyShellLayerConfig, The mutable serializable state lives in ``runtime_state``; the live client is intentionally kept off-snapshot in ``_shellctl_client``. Tool methods update tracked job ids and output offsets after every successful shellctl response so - later ``shell.wait``/``shell.input`` calls can resume from the last known + later ``shell_wait``/``shell_input`` calls can resume from the last known offset without exposing offsets as model-controlled inputs. """ @@ -320,10 +320,10 @@ class DifyShellLayer(PydanticAILayer[NoLayerDeps, object, DifyShellLayerConfig, @override def tools(self) -> Sequence[PydanticAITool[object]]: return [ - Tool(self._tool_run, name="shell.run"), - Tool(self._tool_wait, name="shell.wait"), - Tool(self._tool_input, name="shell.input"), - Tool(self._tool_interrupt, name="shell.interrupt"), + Tool(self._tool_run, name="shell_run"), + Tool(self._tool_wait, name="shell_wait"), + Tool(self._tool_input, name="shell_input"), + Tool(self._tool_interrupt, name="shell_interrupt"), ] @override diff --git a/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py b/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py index a2ab4e435c7..4eba2195453 100644 --- a/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py +++ b/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py @@ -437,24 +437,24 @@ def test_shell_layer_tools_map_inputs_to_shellctl_calls_and_maintain_offsets() - async with layer.resource_context(): layer.runtime_state = DifyShellRuntimeState(session_id="abc12ff", workspace_cwd="~/workspace/abc12ff") - run_tool_def = await tools["shell.run"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] - wait_tool_def = await tools["shell.wait"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] - input_tool_def = await tools["shell.input"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] - interrupt_tool_def = await tools["shell.interrupt"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] + run_tool_def = await tools["shell_run"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] + wait_tool_def = await tools["shell_wait"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] + input_tool_def = await tools["shell_input"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] + interrupt_tool_def = await tools["shell_interrupt"].prepare_tool_def(None) # pyright: ignore[reportArgumentType] - run_result = await tools["shell.run"].function_schema.call( + run_result = await tools["shell_run"].function_schema.call( {"script": "pwd", "timeout": 2.5}, None, # pyright: ignore[reportArgumentType] ) - wait_result = await tools["shell.wait"].function_schema.call( + wait_result = await tools["shell_wait"].function_schema.call( {"job_id": "user-job", "timeout": 4.0}, None, # pyright: ignore[reportArgumentType] ) - input_result = await tools["shell.input"].function_schema.call( + input_result = await tools["shell_input"].function_schema.call( {"job_id": "user-job", "text": "ls\n", "timeout": 5.0}, None, # pyright: ignore[reportArgumentType] ) - interrupt_result = await tools["shell.interrupt"].function_schema.call( + interrupt_result = await tools["shell_interrupt"].function_schema.call( {"job_id": "user-job", "grace_seconds": 1.5}, None, # pyright: ignore[reportArgumentType] ) @@ -467,7 +467,7 @@ def test_shell_layer_tools_map_inputs_to_shellctl_calls_and_maintain_offsets() - assert "offset" not in wait_tool_def.parameters_json_schema.get("properties", {}) assert "offset" not in input_tool_def.parameters_json_schema.get("properties", {}) assert "offset" not in interrupt_tool_def.parameters_json_schema.get("properties", {}) - assert set(tools) == {"shell.run", "shell.wait", "shell.input", "shell.interrupt"} + assert set(tools) == {"shell_run", "shell_wait", "shell_input", "shell_interrupt"} assert run_result["job_id"] == "user-job" assert run_result["offset"] == 10 assert wait_result["offset"] == 18 @@ -497,15 +497,15 @@ def test_shell_layer_tools_reject_untracked_job_ids_without_shellctl_calls() -> async with layer.resource_context(): layer.runtime_state = DifyShellRuntimeState(session_id="abc12ff", workspace_cwd="~/workspace/abc12ff") - wait_result = await tools["shell.wait"].function_schema.call( + wait_result = await tools["shell_wait"].function_schema.call( {"job_id": "missing-job"}, None, # pyright: ignore[reportArgumentType] ) - input_result = await tools["shell.input"].function_schema.call( + input_result = await tools["shell_input"].function_schema.call( {"job_id": "missing-job", "text": "hello"}, None, # pyright: ignore[reportArgumentType] ) - interrupt_result = await tools["shell.interrupt"].function_schema.call( + interrupt_result = await tools["shell_interrupt"].function_schema.call( {"job_id": "missing-job"}, None, # pyright: ignore[reportArgumentType] ) @@ -531,7 +531,7 @@ def test_shell_layer_hooks_and_tools_fail_clearly_outside_active_resource_contex with pytest.raises(RuntimeError, match="resource_context"): await layer.on_context_suspend() - run_result = await tools["shell.run"].function_schema.call( + run_result = await tools["shell_run"].function_schema.call( {"script": "pwd"}, None, # pyright: ignore[reportArgumentType] ) diff --git a/dify-agent/tests/local/dify_agent/runtime/test_runner.py b/dify-agent/tests/local/dify_agent/runtime/test_runner.py index 4a899f0a790..e72fb7c2e85 100644 --- a/dify-agent/tests/local/dify_agent/runtime/test_runner.py +++ b/dify-agent/tests/local/dify_agent/runtime/test_runner.py @@ -663,7 +663,7 @@ def test_runner_rejects_duplicate_tool_names_between_shell_and_other_layers( async def duplicate_shell_run() -> str: return "tool" - return [Tool(duplicate_shell_run, name="shell.run")] + return [Tool(duplicate_shell_run, name="shell_run")] def fake_create_agent(model: object, *, tools: list[Tool[object]], output_type: object) -> object: del model, tools, output_type @@ -739,7 +739,7 @@ def test_runner_rejects_duplicate_tool_names_between_shell_and_other_layers( async with httpx.AsyncClient() as client: with pytest.raises( AgentRunValidationError, - match="unique tool names across all layers, got duplicates: shell.run", + match="unique tool names across all layers, got duplicates: shell_run", ): await AgentRunRunner( sink=sink,