From 5b45cf7c82cb4ba714bc90271385f686330d1dc6 Mon Sep 17 00:00:00 2001
From: Yansong Zhang <916125788@qq.com>
Date: Wed, 3 Jun 2026 11:10:00 +0800
Subject: [PATCH] fix(dify-agent): rename dify.shell tools to provider-valid
 names (shell.run -> shell_run)

The dify.shell layer registered its four tools as shell.run / shell.wait /
shell.input / shell.interrupt. OpenAI and Anthropic function-calling both
require tool names matching ^[a-zA-Z0-9_-]+$, so any Agent run that injected
the shell layer failed at the first model call with HTTP 400
("tools[0].function.name ... does not match pattern").

Rename the four tools (and their in-prompt references) to use underscores so
the names are valid for the providers we ship. The change is self-consistent:
the advertised tool name, the prompt prose, and pydantic-ai's dispatch key all
use the underscore form. Updated the shell layer + runner unit tests that
pinned the dotted names.

Verified live: an Agent App agent autonomously calls shell_run, executes bash
in its isolated per-session workspace (~/workspace/<session_id>), and the real
stdout flows back through the agent-backend SSE bridge.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../src/dify_agent/layers/shell/layer.py      | 44 +++++++++----------
 .../dify_agent/layers/shell/test_layer.py     | 26 +++++------
 .../local/dify_agent/runtime/test_runner.py   |  4 +-
 3 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/dify-agent/src/dify_agent/layers/shell/layer.py b/dify-agent/src/dify_agent/layers/shell/layer.py
index d265075631c..797964d85b2 100644
--- a/dify-agent/src/dify_agent/layers/shell/layer.py
+++ b/dify-agent/src/dify_agent/layers/shell/layer.py
@@ -1,7 +1,7 @@
 """Shellctl-backed Dify shell layer.
 
 ``DifyShellLayer`` is a stateful pydantic-ai tool layer that exposes exactly
-``shell.run``, ``shell.wait``, ``shell.input``, and ``shell.interrupt``. The
+``shell_run``, ``shell_wait``, ``shell_input``, and ``shell_interrupt``. The
 layer persists only JSON-safe shell session state in ``runtime_state`` and keeps
 its live shellctl HTTP client on the layer instance only while
 ``resource_context()`` is active. Agenton enters that resource scope before
@@ -58,51 +58,51 @@ _SESSION_ID_ATTEMPT_LIMIT = 256
 _SESSION_ID_PATTERN = re.compile(r"^[0-9a-f]{7}$")
 _SHELL_LAYER_PREFIX_PROMPT = """You have access to a shell layer. It provides four tools:
 
-1. shell.run
+1. shell_run
    Start a new shell job in the current isolated workspace.
    Use it to execute commands or scripts.
 
-2. shell.wait
+2. shell_wait
    Wait for more output or completion from an existing shell job.
-   Use it when shell.run returns done=false.
+   Use it when shell_run returns done=false.
 
-3. shell.input
+3. shell_input
    Send stdin text to a running shell job, then wait for new output.
    Use it for interactive commands that are waiting for input.
 
-4. shell.interrupt
+4. shell_interrupt
    Interrupt a running shell job.
    Use it to stop a long-running, stuck, or no-longer-needed command.
 
 Common arguments:
 
 - script:
-  The command or script to execute. Used by shell.run.
+  The command or script to execute. Used by shell_run.
 
 - job_id:
-  The id of a shell job returned by shell.run.
-  Use it with shell.wait, shell.input, and shell.interrupt.
+  The id of a shell job returned by shell_run.
+  Use it with shell_wait, shell_input, and shell_interrupt.
   Never invent a job_id.
 
 - timeout:
   Maximum time, in seconds, to wait for output or completion for this tool call.
-  A timeout does not necessarily mean the job has stopped; if done=false, use shell.wait again.
+  A timeout does not necessarily mean the job has stopped; if done=false, use shell_wait again.
 
 - text:
-  Text to send to the running process stdin. Used by shell.input.
+  Text to send to the running process stdin. Used by shell_input.
   Include "\\n" if the process expects Enter.
 
 - grace_seconds:
-  Time to wait after interrupting before forceful cleanup. Used by shell.interrupt.
+  Time to wait after interrupting before forceful cleanup. Used by shell_interrupt.
 
 Usage rules:
 
-- Start with shell.run.
-- If shell.run returns done=false, call shell.wait with the returned job_id.
-- Use shell.input only when the job is running and waiting for stdin.
-- Use shell.interrupt when a job is stuck or should be stopped.
+- Start with shell_run.
+- If shell_run returns done=false, call shell_wait with the returned job_id.
+- Use shell_input only when the job is running and waiting for stdin.
+- Use shell_interrupt when a job is stuck or should be stopped.
 
-The script argument of shell.run can be a normal shell script, or a shebang script.
+The script argument of shell_run can be a normal shell script, or a shebang script.
 If the first line is a shebang, the shell layer executes the script directly.
 
 Tips:
@@ -273,7 +273,7 @@ class DifyShellLayer(PydanticAILayer[NoLayerDeps, object, DifyShellLayerConfig,
     The mutable serializable state lives in ``runtime_state``; the live client is
     intentionally kept off-snapshot in ``_shellctl_client``. Tool methods update
     tracked job ids and output offsets after every successful shellctl response so
-    later ``shell.wait``/``shell.input`` calls can resume from the last known
+    later ``shell_wait``/``shell_input`` calls can resume from the last known
     offset without exposing offsets as model-controlled inputs.
     """
 
@@ -320,10 +320,10 @@ class DifyShellLayer(PydanticAILayer[NoLayerDeps, object, DifyShellLayerConfig,
     @override
     def tools(self) -> Sequence[PydanticAITool[object]]:
         return [
-            Tool(self._tool_run, name="shell.run"),
-            Tool(self._tool_wait, name="shell.wait"),
-            Tool(self._tool_input, name="shell.input"),
-            Tool(self._tool_interrupt, name="shell.interrupt"),
+            Tool(self._tool_run, name="shell_run"),
+            Tool(self._tool_wait, name="shell_wait"),
+            Tool(self._tool_input, name="shell_input"),
+            Tool(self._tool_interrupt, name="shell_interrupt"),
         ]
 
     @override
diff --git a/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py b/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py
index a2ab4e435c7..4eba2195453 100644
--- a/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py
+++ b/dify-agent/tests/local/dify_agent/layers/shell/test_layer.py
@@ -437,24 +437,24 @@ def test_shell_layer_tools_map_inputs_to_shellctl_calls_and_maintain_offsets() -
         async with layer.resource_context():
             layer.runtime_state = DifyShellRuntimeState(session_id="abc12ff", workspace_cwd="~/workspace/abc12ff")
 
-            run_tool_def = await tools["shell.run"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
-            wait_tool_def = await tools["shell.wait"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
-            input_tool_def = await tools["shell.input"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
-            interrupt_tool_def = await tools["shell.interrupt"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
+            run_tool_def = await tools["shell_run"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
+            wait_tool_def = await tools["shell_wait"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
+            input_tool_def = await tools["shell_input"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
+            interrupt_tool_def = await tools["shell_interrupt"].prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
 
-            run_result = await tools["shell.run"].function_schema.call(
+            run_result = await tools["shell_run"].function_schema.call(
                 {"script": "pwd", "timeout": 2.5},
                 None,  # pyright: ignore[reportArgumentType]
             )
-            wait_result = await tools["shell.wait"].function_schema.call(
+            wait_result = await tools["shell_wait"].function_schema.call(
                 {"job_id": "user-job", "timeout": 4.0},
                 None,  # pyright: ignore[reportArgumentType]
             )
-            input_result = await tools["shell.input"].function_schema.call(
+            input_result = await tools["shell_input"].function_schema.call(
                 {"job_id": "user-job", "text": "ls\n", "timeout": 5.0},
                 None,  # pyright: ignore[reportArgumentType]
             )
-            interrupt_result = await tools["shell.interrupt"].function_schema.call(
+            interrupt_result = await tools["shell_interrupt"].function_schema.call(
                 {"job_id": "user-job", "grace_seconds": 1.5},
                 None,  # pyright: ignore[reportArgumentType]
             )
@@ -467,7 +467,7 @@ def test_shell_layer_tools_map_inputs_to_shellctl_calls_and_maintain_offsets() -
             assert "offset" not in wait_tool_def.parameters_json_schema.get("properties", {})
             assert "offset" not in input_tool_def.parameters_json_schema.get("properties", {})
             assert "offset" not in interrupt_tool_def.parameters_json_schema.get("properties", {})
-            assert set(tools) == {"shell.run", "shell.wait", "shell.input", "shell.interrupt"}
+            assert set(tools) == {"shell_run", "shell_wait", "shell_input", "shell_interrupt"}
             assert run_result["job_id"] == "user-job"
             assert run_result["offset"] == 10
             assert wait_result["offset"] == 18
@@ -497,15 +497,15 @@ def test_shell_layer_tools_reject_untracked_job_ids_without_shellctl_calls() ->
         async with layer.resource_context():
             layer.runtime_state = DifyShellRuntimeState(session_id="abc12ff", workspace_cwd="~/workspace/abc12ff")
 
-            wait_result = await tools["shell.wait"].function_schema.call(
+            wait_result = await tools["shell_wait"].function_schema.call(
                 {"job_id": "missing-job"},
                 None,  # pyright: ignore[reportArgumentType]
             )
-            input_result = await tools["shell.input"].function_schema.call(
+            input_result = await tools["shell_input"].function_schema.call(
                 {"job_id": "missing-job", "text": "hello"},
                 None,  # pyright: ignore[reportArgumentType]
             )
-            interrupt_result = await tools["shell.interrupt"].function_schema.call(
+            interrupt_result = await tools["shell_interrupt"].function_schema.call(
                 {"job_id": "missing-job"},
                 None,  # pyright: ignore[reportArgumentType]
             )
@@ -531,7 +531,7 @@ def test_shell_layer_hooks_and_tools_fail_clearly_outside_active_resource_contex
         with pytest.raises(RuntimeError, match="resource_context"):
             await layer.on_context_suspend()
 
-        run_result = await tools["shell.run"].function_schema.call(
+        run_result = await tools["shell_run"].function_schema.call(
             {"script": "pwd"},
             None,  # pyright: ignore[reportArgumentType]
         )
diff --git a/dify-agent/tests/local/dify_agent/runtime/test_runner.py b/dify-agent/tests/local/dify_agent/runtime/test_runner.py
index 4a899f0a790..e72fb7c2e85 100644
--- a/dify-agent/tests/local/dify_agent/runtime/test_runner.py
+++ b/dify-agent/tests/local/dify_agent/runtime/test_runner.py
@@ -663,7 +663,7 @@ def test_runner_rejects_duplicate_tool_names_between_shell_and_other_layers(
         async def duplicate_shell_run() -> str:
             return "tool"
 
-        return [Tool(duplicate_shell_run, name="shell.run")]
+        return [Tool(duplicate_shell_run, name="shell_run")]
 
     def fake_create_agent(model: object, *, tools: list[Tool[object]], output_type: object) -> object:
         del model, tools, output_type
@@ -739,7 +739,7 @@ def test_runner_rejects_duplicate_tool_names_between_shell_and_other_layers(
         async with httpx.AsyncClient() as client:
             with pytest.raises(
                 AgentRunValidationError,
-                match="unique tool names across all layers, got duplicates: shell.run",
+                match="unique tool names across all layers, got duplicates: shell_run",
             ):
                 await AgentRunRunner(
                     sink=sink,