Merge remote-tracking branch 'origin/main' into feat/trigger

2026-04-27 22:18:15 +08:00 · 2025-10-15 20:39:17 +08:00
parent 2ccb20bf3a c0b50ef61d
commit 29353bd7c2
73 changed files with 635 additions and 670 deletions
--- a/api/core/rag/extractor/firecrawl/firecrawl_app.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_app.py
@ -25,7 +25,7 @@ class FirecrawlApp:
        }
        if params:
            json_data.update(params)
-        response = self._post_request(f"{self.base_url}/v1/scrape", json_data, headers)
+        response = self._post_request(f"{self.base_url}/v2/scrape", json_data, headers)
        if response.status_code == 200:
            response_data = response.json()
            data = response_data["data"]
@ -42,7 +42,7 @@ class FirecrawlApp:
        json_data = {"url": url}
        if params:
            json_data.update(params)
-        response = self._post_request(f"{self.base_url}/v1/crawl", json_data, headers)
+        response = self._post_request(f"{self.base_url}/v2/crawl", json_data, headers)
        if response.status_code == 200:
            # There's also another two fields in the response: "success" (bool) and "url" (str)
            job_id = response.json().get("id")
@ -51,9 +51,25 @@ class FirecrawlApp:
            self._handle_error(response, "start crawl job")
            return ""  # unreachable

+    def map(self, url: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
+        # Documentation: https://docs.firecrawl.dev/api-reference/endpoint/map
+        headers = self._prepare_headers()
+        json_data: dict[str, Any] = {"url": url, "integration": "dify"}
+        if params:
+            # Pass through provided params, including optional "sitemap": "only" | "include" | "skip"
+            json_data.update(params)
+        response = self._post_request(f"{self.base_url}/v2/map", json_data, headers)
+        if response.status_code == 200:
+            return cast(dict[str, Any], response.json())
+        elif response.status_code in {402, 409, 500, 429, 408}:
+            self._handle_error(response, "start map job")
+            return {}
+        else:
+            raise Exception(f"Failed to start map job. Status code: {response.status_code}")
+
    def check_crawl_status(self, job_id) -> dict[str, Any]:
        headers = self._prepare_headers()
-        response = self._get_request(f"{self.base_url}/v1/crawl/{job_id}", headers)
+        response = self._get_request(f"{self.base_url}/v2/crawl/{job_id}", headers)
        if response.status_code == 200:
            crawl_status_response = response.json()
            if crawl_status_response.get("status") == "completed":
@ -135,12 +151,16 @@ class FirecrawlApp:
            "lang": "en",
            "country": "us",
            "timeout": 60000,
-            "ignoreInvalidURLs": False,
+            "ignoreInvalidURLs": True,
            "scrapeOptions": {},
+            "sources": [
+                {"type": "web"},
+            ],
+            "integration": "dify",
        }
        if params:
            json_data.update(params)
-        response = self._post_request(f"{self.base_url}/v1/search", json_data, headers)
+        response = self._post_request(f"{self.base_url}/v2/search", json_data, headers)
        if response.status_code == 200:
            response_data = response.json()
            if not response_data.get("success"):
--- a/api/core/workflow/graph_engine/command_channels/redis_channel.py
+++ b/api/core/workflow/graph_engine/command_channels/redis_channel.py
@ -41,6 +41,7 @@ class RedisChannel:
        self._redis = redis_client
        self._key = channel_key
        self._command_ttl = command_ttl
+        self._pending_key = f"{channel_key}:pending"

    def fetch_commands(self) -> list[GraphEngineCommand]:
        """
@ -49,6 +50,9 @@ class RedisChannel:
        Returns:
            List of pending commands (drains the Redis list)
        """
+        if not self._has_pending_commands():
+            return []
+
        commands: list[GraphEngineCommand] = []

        # Use pipeline for atomic operations
@ -85,6 +89,7 @@ class RedisChannel:
        with self._redis.pipeline() as pipe:
            pipe.rpush(self._key, command_json)
            pipe.expire(self._key, self._command_ttl)
+            pipe.set(self._pending_key, "1", ex=self._command_ttl)
            pipe.execute()

    def _deserialize_command(self, data: dict[str, Any]) -> GraphEngineCommand | None:
@ -112,3 +117,17 @@ class RedisChannel:

        except (ValueError, TypeError):
            return None
+
+    def _has_pending_commands(self) -> bool:
+        """
+        Check and consume the pending marker to avoid unnecessary list reads.
+
+        Returns:
+            True if commands should be fetched from Redis.
+        """
+        with self._redis.pipeline() as pipe:
+            pipe.get(self._pending_key)
+            pipe.delete(self._pending_key)
+            pending_value, _ = pipe.execute()
+
+        return pending_value is not None
--- a/api/core/workflow/graph_engine/event_management/event_handlers.py
+++ b/api/core/workflow/graph_engine/event_management/event_handlers.py
@ -7,6 +7,7 @@ from collections.abc import Mapping
 from functools import singledispatchmethod
 from typing import TYPE_CHECKING, final

+from core.model_runtime.entities.llm_entities import LLMUsage
 from core.workflow.entities import GraphRuntimeState
 from core.workflow.enums import ErrorStrategy, NodeExecutionType
 from core.workflow.graph import Graph
@ -125,6 +126,7 @@ class EventHandler:
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
        is_initial_attempt = node_execution.retry_count == 0
        node_execution.mark_started(event.id)
+        self._graph_runtime_state.increment_node_run_steps()

        # Track in response coordinator for stream ordering
        self._response_coordinator.track_node_execution(event.node_id, event.id)
@ -163,6 +165,8 @@ class EventHandler:
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
        node_execution.mark_taken()

+        self._accumulate_node_usage(event.node_run_result.llm_usage)
+
        # Store outputs in variable pool
        self._store_node_outputs(event.node_id, event.node_run_result.outputs)

@ -212,6 +216,8 @@ class EventHandler:
        node_execution.mark_failed(event.error)
        self._graph_execution.record_node_failure()

+        self._accumulate_node_usage(event.node_run_result.llm_usage)
+
        result = self._error_handler.handle_node_failure(event)

        if result:
@ -235,6 +241,8 @@ class EventHandler:
        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
        node_execution.mark_taken()

+        self._accumulate_node_usage(event.node_run_result.llm_usage)
+
        # Persist outputs produced by the exception strategy (e.g. default values)
        self._store_node_outputs(event.node_id, event.node_run_result.outputs)

@ -286,6 +294,19 @@ class EventHandler:
        self._state_manager.enqueue_node(event.node_id)
        self._state_manager.start_execution(event.node_id)

+    def _accumulate_node_usage(self, usage: LLMUsage) -> None:
+        """Accumulate token usage into the shared runtime state."""
+        if usage.total_tokens <= 0:
+            return
+
+        self._graph_runtime_state.add_tokens(usage.total_tokens)
+
+        current_usage = self._graph_runtime_state.llm_usage
+        if current_usage.total_tokens == 0:
+            self._graph_runtime_state.llm_usage = usage
+        else:
+            self._graph_runtime_state.llm_usage = current_usage.plus(usage)
+
    def _store_node_outputs(self, node_id: str, outputs: Mapping[str, object]) -> None:
        """
        Store node outputs in the variable pool.
--- a/api/core/workflow/graph_engine/orchestration/dispatcher.py
+++ b/api/core/workflow/graph_engine/orchestration/dispatcher.py
@ -8,7 +8,12 @@ import threading
 import time
 from typing import TYPE_CHECKING, final

-from core.workflow.graph_events.base import GraphNodeEventBase
+from core.workflow.graph_events import (
+    GraphNodeEventBase,
+    NodeRunExceptionEvent,
+    NodeRunFailedEvent,
+    NodeRunSucceededEvent,
+)

 from ..event_management import EventManager
 from .execution_coordinator import ExecutionCoordinator
@ -72,13 +77,16 @@ class Dispatcher:
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=10.0)

+    _COMMAND_TRIGGER_EVENTS = (
+        NodeRunSucceededEvent,
+        NodeRunFailedEvent,
+        NodeRunExceptionEvent,
+    )
+
    def _dispatcher_loop(self) -> None:
        """Main dispatcher loop."""
        try:
            while not self._stop_event.is_set():
-                # Check for commands
-                self._execution_coordinator.check_commands()
-
                # Check for scaling
                self._execution_coordinator.check_scaling()

@ -87,6 +95,8 @@ class Dispatcher:
                    event = self._event_queue.get(timeout=0.1)
                    # Route to the event handler
                    self._event_handler.dispatch(event)
+                    if self._should_check_commands(event):
+                        self._execution_coordinator.check_commands()
                    self._event_queue.task_done()
                except queue.Empty:
                    # Check if execution is complete
@ -102,3 +112,7 @@ class Dispatcher:
            # Signal the event emitter that execution is complete
            if self._event_emitter:
                self._event_emitter.mark_complete()
+
+    def _should_check_commands(self, event: GraphNodeEventBase) -> bool:
+        """Return True if the event represents a node completion."""
+        return isinstance(event, self._COMMAND_TRIGGER_EVENTS)