Merge branch 'feat/summary-index' into deploy/dev

This commit is contained in:
zxhlyh
2026-01-28 09:50:05 +08:00
301 changed files with 40720 additions and 8308 deletions

View File

@ -36,7 +36,7 @@ def init_app(app: DifyApp) -> None:
router.include_router(console_router, prefix="/console/api")
CORS(
app,
resources={r"/console/api/*": {"origins": dify_config.CONSOLE_CORS_ALLOW_ORIGINS}},
resources={r"/console/api/.*": {"origins": dify_config.CONSOLE_CORS_ALLOW_ORIGINS}},
supports_credentials=True,
allow_headers=list(AUTHENTICATED_HEADERS),
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],

View File

@ -0,0 +1,20 @@
"""
OpenTelemetry node parsers for workflow nodes.
This module provides parsers that extract node-specific metadata and set
OpenTelemetry span attributes according to semantic conventions.
"""
from extensions.otel.parser.base import DefaultNodeOTelParser, NodeOTelParser, safe_json_dumps
from extensions.otel.parser.llm import LLMNodeOTelParser
from extensions.otel.parser.retrieval import RetrievalNodeOTelParser
from extensions.otel.parser.tool import ToolNodeOTelParser
__all__ = [
"DefaultNodeOTelParser",
"LLMNodeOTelParser",
"NodeOTelParser",
"RetrievalNodeOTelParser",
"ToolNodeOTelParser",
"safe_json_dumps",
]

View File

@ -0,0 +1,117 @@
"""
Base parser interface and utilities for OpenTelemetry node parsers.
"""
import json
from typing import Any, Protocol
from opentelemetry.trace import Span
from opentelemetry.trace.status import Status, StatusCode
from pydantic import BaseModel
from core.file.models import File
from core.variables import Segment
from core.workflow.enums import NodeType
from core.workflow.graph_events import GraphNodeEventBase
from core.workflow.nodes.base.node import Node
from extensions.otel.semconv.gen_ai import ChainAttributes, GenAIAttributes
def safe_json_dumps(obj: Any, ensure_ascii: bool = False) -> str:
"""
Safely serialize objects to JSON, handling non-serializable types.
Handles:
- Segment types (ArrayFileSegment, FileSegment, etc.) - converts to their value
- File objects - converts to dict using to_dict()
- BaseModel objects - converts using model_dump()
- Other types - falls back to str() representation
Args:
obj: Object to serialize
ensure_ascii: Whether to ensure ASCII encoding
Returns:
JSON string representation of the object
"""
def _convert_value(value: Any) -> Any:
"""Recursively convert non-serializable values."""
if value is None:
return None
if isinstance(value, (bool, int, float, str)):
return value
if isinstance(value, Segment):
# Convert Segment to its underlying value
return _convert_value(value.value)
if isinstance(value, File):
# Convert File to dict
return value.to_dict()
if isinstance(value, BaseModel):
# Convert Pydantic model to dict
return _convert_value(value.model_dump(mode="json"))
if isinstance(value, dict):
return {k: _convert_value(v) for k, v in value.items()}
if isinstance(value, (list, tuple)):
return [_convert_value(item) for item in value]
# Fallback to string representation for unknown types
return str(value)
try:
converted = _convert_value(obj)
return json.dumps(converted, ensure_ascii=ensure_ascii)
except (TypeError, ValueError) as e:
# If conversion still fails, return error message as string
return json.dumps(
{"error": f"Failed to serialize: {type(obj).__name__}", "message": str(e)}, ensure_ascii=ensure_ascii
)
class NodeOTelParser(Protocol):
"""Parser interface for node-specific OpenTelemetry enrichment."""
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None: ...
class DefaultNodeOTelParser:
"""Fallback parser used when no node-specific parser is registered."""
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
span.set_attribute("node.id", node.id)
if node.execution_id:
span.set_attribute("node.execution_id", node.execution_id)
if hasattr(node, "node_type") and node.node_type:
span.set_attribute("node.type", node.node_type.value)
span.set_attribute(GenAIAttributes.FRAMEWORK, "dify")
node_type = getattr(node, "node_type", None)
if isinstance(node_type, NodeType):
if node_type == NodeType.LLM:
span.set_attribute(GenAIAttributes.SPAN_KIND, "LLM")
elif node_type == NodeType.KNOWLEDGE_RETRIEVAL:
span.set_attribute(GenAIAttributes.SPAN_KIND, "RETRIEVER")
elif node_type == NodeType.TOOL:
span.set_attribute(GenAIAttributes.SPAN_KIND, "TOOL")
else:
span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK")
else:
span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK")
# Extract inputs and outputs from result_event
if result_event and result_event.node_run_result:
node_run_result = result_event.node_run_result
if node_run_result.inputs:
span.set_attribute(ChainAttributes.INPUT_VALUE, safe_json_dumps(node_run_result.inputs))
if node_run_result.outputs:
span.set_attribute(ChainAttributes.OUTPUT_VALUE, safe_json_dumps(node_run_result.outputs))
if error:
span.record_exception(error)
span.set_status(Status(StatusCode.ERROR, str(error)))
else:
span.set_status(Status(StatusCode.OK))

View File

@ -0,0 +1,155 @@
"""
Parser for LLM nodes that captures LLM-specific metadata.
"""
import logging
from collections.abc import Mapping
from typing import Any
from opentelemetry.trace import Span
from core.workflow.graph_events import GraphNodeEventBase
from core.workflow.nodes.base.node import Node
from extensions.otel.parser.base import DefaultNodeOTelParser, safe_json_dumps
from extensions.otel.semconv.gen_ai import LLMAttributes
logger = logging.getLogger(__name__)
def _format_input_messages(process_data: Mapping[str, Any]) -> str:
"""
Format input messages from process_data for LLM spans.
Args:
process_data: Process data containing prompts
Returns:
JSON string of formatted input messages
"""
try:
if not isinstance(process_data, dict):
return safe_json_dumps([])
prompts = process_data.get("prompts", [])
if not prompts:
return safe_json_dumps([])
valid_roles = {"system", "user", "assistant", "tool"}
input_messages = []
for prompt in prompts:
if not isinstance(prompt, dict):
continue
role = prompt.get("role", "")
text = prompt.get("text", "")
if not role or role not in valid_roles:
continue
if text:
message = {"role": role, "parts": [{"type": "text", "content": text}]}
input_messages.append(message)
return safe_json_dumps(input_messages)
except Exception as e:
logger.warning("Failed to format input messages: %s", e, exc_info=True)
return safe_json_dumps([])
def _format_output_messages(outputs: Mapping[str, Any]) -> str:
"""
Format output messages from outputs for LLM spans.
Args:
outputs: Output data containing text and finish_reason
Returns:
JSON string of formatted output messages
"""
try:
if not isinstance(outputs, dict):
return safe_json_dumps([])
text = outputs.get("text", "")
finish_reason = outputs.get("finish_reason", "")
if not text:
return safe_json_dumps([])
valid_finish_reasons = {"stop", "length", "content_filter", "tool_call", "error"}
if finish_reason not in valid_finish_reasons:
finish_reason = "stop"
output_message = {
"role": "assistant",
"parts": [{"type": "text", "content": text}],
"finish_reason": finish_reason,
}
return safe_json_dumps([output_message])
except Exception as e:
logger.warning("Failed to format output messages: %s", e, exc_info=True)
return safe_json_dumps([])
class LLMNodeOTelParser:
"""Parser for LLM nodes that captures LLM-specific metadata."""
def __init__(self) -> None:
self._delegate = DefaultNodeOTelParser()
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
self._delegate.parse(node=node, span=span, error=error, result_event=result_event)
if not result_event or not result_event.node_run_result:
return
node_run_result = result_event.node_run_result
process_data = node_run_result.process_data or {}
outputs = node_run_result.outputs or {}
# Extract usage data (from process_data or outputs)
usage_data = process_data.get("usage") or outputs.get("usage") or {}
# Model and provider information
model_name = process_data.get("model_name") or ""
model_provider = process_data.get("model_provider") or ""
if model_name:
span.set_attribute(LLMAttributes.REQUEST_MODEL, model_name)
if model_provider:
span.set_attribute(LLMAttributes.PROVIDER_NAME, model_provider)
# Token usage
if usage_data:
prompt_tokens = usage_data.get("prompt_tokens", 0)
completion_tokens = usage_data.get("completion_tokens", 0)
total_tokens = usage_data.get("total_tokens", 0)
span.set_attribute(LLMAttributes.USAGE_INPUT_TOKENS, prompt_tokens)
span.set_attribute(LLMAttributes.USAGE_OUTPUT_TOKENS, completion_tokens)
span.set_attribute(LLMAttributes.USAGE_TOTAL_TOKENS, total_tokens)
# Prompts and completion
prompts = process_data.get("prompts", [])
if prompts:
prompts_json = safe_json_dumps(prompts)
span.set_attribute(LLMAttributes.PROMPT, prompts_json)
text_output = str(outputs.get("text", ""))
if text_output:
span.set_attribute(LLMAttributes.COMPLETION, text_output)
# Finish reason
finish_reason = outputs.get("finish_reason") or ""
if finish_reason:
span.set_attribute(LLMAttributes.RESPONSE_FINISH_REASON, finish_reason)
# Structured input/output messages
gen_ai_input_message = _format_input_messages(process_data)
gen_ai_output_message = _format_output_messages(outputs)
span.set_attribute(LLMAttributes.INPUT_MESSAGE, gen_ai_input_message)
span.set_attribute(LLMAttributes.OUTPUT_MESSAGE, gen_ai_output_message)

View File

@ -0,0 +1,105 @@
"""
Parser for knowledge retrieval nodes that captures retrieval-specific metadata.
"""
import logging
from collections.abc import Sequence
from typing import Any
from opentelemetry.trace import Span
from core.variables import Segment
from core.workflow.graph_events import GraphNodeEventBase
from core.workflow.nodes.base.node import Node
from extensions.otel.parser.base import DefaultNodeOTelParser, safe_json_dumps
from extensions.otel.semconv.gen_ai import RetrieverAttributes
logger = logging.getLogger(__name__)
def _format_retrieval_documents(retrieval_documents: list[Any]) -> list:
"""
Format retrieval documents for semantic conventions.
Args:
retrieval_documents: List of retrieval document dictionaries
Returns:
List of formatted semantic documents
"""
try:
if not isinstance(retrieval_documents, list):
return []
semantic_documents = []
for doc in retrieval_documents:
if not isinstance(doc, dict):
continue
metadata = doc.get("metadata", {})
content = doc.get("content", "")
title = doc.get("title", "")
score = metadata.get("score", 0.0)
document_id = metadata.get("document_id", "")
semantic_metadata = {}
if title:
semantic_metadata["title"] = title
if metadata.get("source"):
semantic_metadata["source"] = metadata["source"]
elif metadata.get("_source"):
semantic_metadata["source"] = metadata["_source"]
if metadata.get("doc_metadata"):
doc_metadata = metadata["doc_metadata"]
if isinstance(doc_metadata, dict):
semantic_metadata.update(doc_metadata)
semantic_doc = {
"document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id}
}
semantic_documents.append(semantic_doc)
return semantic_documents
except Exception as e:
logger.warning("Failed to format retrieval documents: %s", e, exc_info=True)
return []
class RetrievalNodeOTelParser:
"""Parser for knowledge retrieval nodes that captures retrieval-specific metadata."""
def __init__(self) -> None:
self._delegate = DefaultNodeOTelParser()
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
self._delegate.parse(node=node, span=span, error=error, result_event=result_event)
if not result_event or not result_event.node_run_result:
return
node_run_result = result_event.node_run_result
inputs = node_run_result.inputs or {}
outputs = node_run_result.outputs or {}
# Extract query from inputs
query = str(inputs.get("query", "")) if inputs else ""
if query:
span.set_attribute(RetrieverAttributes.QUERY, query)
# Extract and format retrieval documents from outputs
result_value = outputs.get("result") if outputs else None
retrieval_documents: list[Any] = []
if result_value:
value_to_check = result_value
if isinstance(result_value, Segment):
value_to_check = result_value.value
if isinstance(value_to_check, (list, Sequence)):
retrieval_documents = list(value_to_check)
if retrieval_documents:
semantic_retrieval_documents = _format_retrieval_documents(retrieval_documents)
semantic_retrieval_documents_json = safe_json_dumps(semantic_retrieval_documents)
span.set_attribute(RetrieverAttributes.DOCUMENT, semantic_retrieval_documents_json)

View File

@ -0,0 +1,47 @@
"""
Parser for tool nodes that captures tool-specific metadata.
"""
from opentelemetry.trace import Span
from core.workflow.enums import WorkflowNodeExecutionMetadataKey
from core.workflow.graph_events import GraphNodeEventBase
from core.workflow.nodes.base.node import Node
from core.workflow.nodes.tool.entities import ToolNodeData
from extensions.otel.parser.base import DefaultNodeOTelParser, safe_json_dumps
from extensions.otel.semconv.gen_ai import ToolAttributes
class ToolNodeOTelParser:
"""Parser for tool nodes that captures tool-specific metadata."""
def __init__(self) -> None:
self._delegate = DefaultNodeOTelParser()
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
self._delegate.parse(node=node, span=span, error=error, result_event=result_event)
tool_data = getattr(node, "_node_data", None)
if not isinstance(tool_data, ToolNodeData):
return
span.set_attribute(ToolAttributes.TOOL_NAME, node.title)
span.set_attribute(ToolAttributes.TOOL_TYPE, tool_data.provider_type.value)
# Extract tool info from metadata (consistent with aliyun_trace)
tool_info = {}
if result_event and result_event.node_run_result:
node_run_result = result_event.node_run_result
if node_run_result.metadata:
tool_info = node_run_result.metadata.get(WorkflowNodeExecutionMetadataKey.TOOL_INFO, {})
if tool_info:
span.set_attribute(ToolAttributes.TOOL_DESCRIPTION, safe_json_dumps(tool_info))
if result_event and result_event.node_run_result and result_event.node_run_result.inputs:
span.set_attribute(ToolAttributes.TOOL_CALL_ARGUMENTS, safe_json_dumps(result_event.node_run_result.inputs))
if result_event and result_event.node_run_result and result_event.node_run_result.outputs:
span.set_attribute(ToolAttributes.TOOL_CALL_RESULT, safe_json_dumps(result_event.node_run_result.outputs))

View File

@ -1,6 +1,13 @@
"""Semantic convention shortcuts for Dify-specific spans."""
from .dify import DifySpanAttributes
from .gen_ai import GenAIAttributes
from .gen_ai import ChainAttributes, GenAIAttributes, LLMAttributes, RetrieverAttributes, ToolAttributes
__all__ = ["DifySpanAttributes", "GenAIAttributes"]
__all__ = [
"ChainAttributes",
"DifySpanAttributes",
"GenAIAttributes",
"LLMAttributes",
"RetrieverAttributes",
"ToolAttributes",
]

View File

@ -62,3 +62,37 @@ class ToolAttributes:
TOOL_CALL_RESULT = "gen_ai.tool.call.result"
"""Tool invocation result."""
class LLMAttributes:
"""LLM operation attribute keys."""
REQUEST_MODEL = "gen_ai.request.model"
"""Model identifier."""
PROVIDER_NAME = "gen_ai.provider.name"
"""Provider name."""
USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
"""Number of input tokens."""
USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
"""Number of output tokens."""
USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens"
"""Total number of tokens."""
PROMPT = "gen_ai.prompt"
"""Prompt text."""
COMPLETION = "gen_ai.completion"
"""Completion text."""
RESPONSE_FINISH_REASON = "gen_ai.response.finish_reason"
"""Finish reason for the response."""
INPUT_MESSAGE = "gen_ai.input.messages"
"""Input messages in structured format."""
OUTPUT_MESSAGE = "gen_ai.output.messages"
"""Output messages in structured format."""