mirror of
https://github.com/langgenius/dify.git
synced 2026-04-27 22:18:15 +08:00
Merge remote-tracking branch 'origin/main' into feat/trigger
This commit is contained in:
@ -25,7 +25,7 @@ class FirecrawlApp:
|
||||
}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
response = self._post_request(f"{self.base_url}/v1/scrape", json_data, headers)
|
||||
response = self._post_request(f"{self.base_url}/v2/scrape", json_data, headers)
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
data = response_data["data"]
|
||||
@ -42,7 +42,7 @@ class FirecrawlApp:
|
||||
json_data = {"url": url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
response = self._post_request(f"{self.base_url}/v1/crawl", json_data, headers)
|
||||
response = self._post_request(f"{self.base_url}/v2/crawl", json_data, headers)
|
||||
if response.status_code == 200:
|
||||
# There's also another two fields in the response: "success" (bool) and "url" (str)
|
||||
job_id = response.json().get("id")
|
||||
@ -51,9 +51,25 @@ class FirecrawlApp:
|
||||
self._handle_error(response, "start crawl job")
|
||||
return "" # unreachable
|
||||
|
||||
def map(self, url: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
# Documentation: https://docs.firecrawl.dev/api-reference/endpoint/map
|
||||
headers = self._prepare_headers()
|
||||
json_data: dict[str, Any] = {"url": url, "integration": "dify"}
|
||||
if params:
|
||||
# Pass through provided params, including optional "sitemap": "only" | "include" | "skip"
|
||||
json_data.update(params)
|
||||
response = self._post_request(f"{self.base_url}/v2/map", json_data, headers)
|
||||
if response.status_code == 200:
|
||||
return cast(dict[str, Any], response.json())
|
||||
elif response.status_code in {402, 409, 500, 429, 408}:
|
||||
self._handle_error(response, "start map job")
|
||||
return {}
|
||||
else:
|
||||
raise Exception(f"Failed to start map job. Status code: {response.status_code}")
|
||||
|
||||
def check_crawl_status(self, job_id) -> dict[str, Any]:
|
||||
headers = self._prepare_headers()
|
||||
response = self._get_request(f"{self.base_url}/v1/crawl/{job_id}", headers)
|
||||
response = self._get_request(f"{self.base_url}/v2/crawl/{job_id}", headers)
|
||||
if response.status_code == 200:
|
||||
crawl_status_response = response.json()
|
||||
if crawl_status_response.get("status") == "completed":
|
||||
@ -135,12 +151,16 @@ class FirecrawlApp:
|
||||
"lang": "en",
|
||||
"country": "us",
|
||||
"timeout": 60000,
|
||||
"ignoreInvalidURLs": False,
|
||||
"ignoreInvalidURLs": True,
|
||||
"scrapeOptions": {},
|
||||
"sources": [
|
||||
{"type": "web"},
|
||||
],
|
||||
"integration": "dify",
|
||||
}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
response = self._post_request(f"{self.base_url}/v1/search", json_data, headers)
|
||||
response = self._post_request(f"{self.base_url}/v2/search", json_data, headers)
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
if not response_data.get("success"):
|
||||
|
||||
@ -41,6 +41,7 @@ class RedisChannel:
|
||||
self._redis = redis_client
|
||||
self._key = channel_key
|
||||
self._command_ttl = command_ttl
|
||||
self._pending_key = f"{channel_key}:pending"
|
||||
|
||||
def fetch_commands(self) -> list[GraphEngineCommand]:
|
||||
"""
|
||||
@ -49,6 +50,9 @@ class RedisChannel:
|
||||
Returns:
|
||||
List of pending commands (drains the Redis list)
|
||||
"""
|
||||
if not self._has_pending_commands():
|
||||
return []
|
||||
|
||||
commands: list[GraphEngineCommand] = []
|
||||
|
||||
# Use pipeline for atomic operations
|
||||
@ -85,6 +89,7 @@ class RedisChannel:
|
||||
with self._redis.pipeline() as pipe:
|
||||
pipe.rpush(self._key, command_json)
|
||||
pipe.expire(self._key, self._command_ttl)
|
||||
pipe.set(self._pending_key, "1", ex=self._command_ttl)
|
||||
pipe.execute()
|
||||
|
||||
def _deserialize_command(self, data: dict[str, Any]) -> GraphEngineCommand | None:
|
||||
@ -112,3 +117,17 @@ class RedisChannel:
|
||||
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
def _has_pending_commands(self) -> bool:
|
||||
"""
|
||||
Check and consume the pending marker to avoid unnecessary list reads.
|
||||
|
||||
Returns:
|
||||
True if commands should be fetched from Redis.
|
||||
"""
|
||||
with self._redis.pipeline() as pipe:
|
||||
pipe.get(self._pending_key)
|
||||
pipe.delete(self._pending_key)
|
||||
pending_value, _ = pipe.execute()
|
||||
|
||||
return pending_value is not None
|
||||
|
||||
@ -7,6 +7,7 @@ from collections.abc import Mapping
|
||||
from functools import singledispatchmethod
|
||||
from typing import TYPE_CHECKING, final
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMUsage
|
||||
from core.workflow.entities import GraphRuntimeState
|
||||
from core.workflow.enums import ErrorStrategy, NodeExecutionType
|
||||
from core.workflow.graph import Graph
|
||||
@ -125,6 +126,7 @@ class EventHandler:
|
||||
node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
|
||||
is_initial_attempt = node_execution.retry_count == 0
|
||||
node_execution.mark_started(event.id)
|
||||
self._graph_runtime_state.increment_node_run_steps()
|
||||
|
||||
# Track in response coordinator for stream ordering
|
||||
self._response_coordinator.track_node_execution(event.node_id, event.id)
|
||||
@ -163,6 +165,8 @@ class EventHandler:
|
||||
node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
|
||||
node_execution.mark_taken()
|
||||
|
||||
self._accumulate_node_usage(event.node_run_result.llm_usage)
|
||||
|
||||
# Store outputs in variable pool
|
||||
self._store_node_outputs(event.node_id, event.node_run_result.outputs)
|
||||
|
||||
@ -212,6 +216,8 @@ class EventHandler:
|
||||
node_execution.mark_failed(event.error)
|
||||
self._graph_execution.record_node_failure()
|
||||
|
||||
self._accumulate_node_usage(event.node_run_result.llm_usage)
|
||||
|
||||
result = self._error_handler.handle_node_failure(event)
|
||||
|
||||
if result:
|
||||
@ -235,6 +241,8 @@ class EventHandler:
|
||||
node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
|
||||
node_execution.mark_taken()
|
||||
|
||||
self._accumulate_node_usage(event.node_run_result.llm_usage)
|
||||
|
||||
# Persist outputs produced by the exception strategy (e.g. default values)
|
||||
self._store_node_outputs(event.node_id, event.node_run_result.outputs)
|
||||
|
||||
@ -286,6 +294,19 @@ class EventHandler:
|
||||
self._state_manager.enqueue_node(event.node_id)
|
||||
self._state_manager.start_execution(event.node_id)
|
||||
|
||||
def _accumulate_node_usage(self, usage: LLMUsage) -> None:
|
||||
"""Accumulate token usage into the shared runtime state."""
|
||||
if usage.total_tokens <= 0:
|
||||
return
|
||||
|
||||
self._graph_runtime_state.add_tokens(usage.total_tokens)
|
||||
|
||||
current_usage = self._graph_runtime_state.llm_usage
|
||||
if current_usage.total_tokens == 0:
|
||||
self._graph_runtime_state.llm_usage = usage
|
||||
else:
|
||||
self._graph_runtime_state.llm_usage = current_usage.plus(usage)
|
||||
|
||||
def _store_node_outputs(self, node_id: str, outputs: Mapping[str, object]) -> None:
|
||||
"""
|
||||
Store node outputs in the variable pool.
|
||||
|
||||
@ -8,7 +8,12 @@ import threading
|
||||
import time
|
||||
from typing import TYPE_CHECKING, final
|
||||
|
||||
from core.workflow.graph_events.base import GraphNodeEventBase
|
||||
from core.workflow.graph_events import (
|
||||
GraphNodeEventBase,
|
||||
NodeRunExceptionEvent,
|
||||
NodeRunFailedEvent,
|
||||
NodeRunSucceededEvent,
|
||||
)
|
||||
|
||||
from ..event_management import EventManager
|
||||
from .execution_coordinator import ExecutionCoordinator
|
||||
@ -72,13 +77,16 @@ class Dispatcher:
|
||||
if self._thread and self._thread.is_alive():
|
||||
self._thread.join(timeout=10.0)
|
||||
|
||||
_COMMAND_TRIGGER_EVENTS = (
|
||||
NodeRunSucceededEvent,
|
||||
NodeRunFailedEvent,
|
||||
NodeRunExceptionEvent,
|
||||
)
|
||||
|
||||
def _dispatcher_loop(self) -> None:
|
||||
"""Main dispatcher loop."""
|
||||
try:
|
||||
while not self._stop_event.is_set():
|
||||
# Check for commands
|
||||
self._execution_coordinator.check_commands()
|
||||
|
||||
# Check for scaling
|
||||
self._execution_coordinator.check_scaling()
|
||||
|
||||
@ -87,6 +95,8 @@ class Dispatcher:
|
||||
event = self._event_queue.get(timeout=0.1)
|
||||
# Route to the event handler
|
||||
self._event_handler.dispatch(event)
|
||||
if self._should_check_commands(event):
|
||||
self._execution_coordinator.check_commands()
|
||||
self._event_queue.task_done()
|
||||
except queue.Empty:
|
||||
# Check if execution is complete
|
||||
@ -102,3 +112,7 @@ class Dispatcher:
|
||||
# Signal the event emitter that execution is complete
|
||||
if self._event_emitter:
|
||||
self._event_emitter.mark_complete()
|
||||
|
||||
def _should_check_commands(self, event: GraphNodeEventBase) -> bool:
|
||||
"""Return True if the event represents a node completion."""
|
||||
return isinstance(event, self._COMMAND_TRIGGER_EVENTS)
|
||||
|
||||
Reference in New Issue
Block a user