refactor(telemetry): introduce TelemetryFacade to centralize event emission

Migrate from direct TraceQueueManager.add_trace_task calls to TelemetryFacade.emit
with TelemetryEvent abstraction. This reduces CE code invasion by consolidating
telemetry logic in core/telemetry/ with a single guard in ops_trace_manager.py.
This commit is contained in:
GareArc
2026-02-05 04:23:56 -08:00
parent 2d60be311d
commit ed222945aa
16 changed files with 639 additions and 330 deletions

View File

@ -62,7 +62,8 @@ from core.app.task_pipeline.message_cycle_manager import MessageCycleManager
from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
from core.model_runtime.entities.llm_entities import LLMUsage
from core.model_runtime.utils.encoders import jsonable_encoder
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
from core.workflow.enums import WorkflowExecutionStatus
from core.workflow.nodes import NodeType
from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
@ -831,12 +832,19 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
session.add_all(message_files)
if trace_manager:
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.MESSAGE_TRACE,
conversation_id=str(message.conversation_id),
message_id=str(message.id),
)
TelemetryFacade.emit(
TelemetryEvent(
name="message",
context=TelemetryContext(
tenant_id=self._application_generate_entity.app_config.tenant_id,
app_id=self._application_generate_entity.app_config.app_id,
),
payload={
"conversation_id": str(message.conversation_id),
"message_id": str(message.id),
},
),
trace_manager=trace_manager,
)
def _seed_graph_runtime_state_from_queue_manager(self) -> None:

View File

@ -52,10 +52,10 @@ from core.model_runtime.entities.message_entities import (
TextPromptMessageContent,
)
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.ops_trace_manager import TraceQueueManager
from core.prompt.utils.prompt_message_util import PromptMessageUtil
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
from events.message_event import message_was_created
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
@ -409,10 +409,19 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
message.message_metadata = self._task_state.metadata.model_dump_json()
if trace_manager:
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.MESSAGE_TRACE, conversation_id=self._conversation_id, message_id=self._message_id
)
TelemetryFacade.emit(
TelemetryEvent(
name="message",
context=TelemetryContext(
tenant_id=self._application_generate_entity.app_config.tenant_id,
app_id=self._application_generate_entity.app_config.app_id,
),
payload={
"conversation_id": self._conversation_id,
"message_id": self._message_id,
},
),
trace_manager=trace_manager,
)
message_was_created.send(

View File

@ -15,8 +15,7 @@ from datetime import datetime
from typing import Any, Union
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.ops_trace_manager import TraceQueueManager
from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID
from core.workflow.entities import WorkflowExecution, WorkflowNodeExecution
from core.workflow.enums import (
@ -396,25 +395,31 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
external_trace_id = self._application_generate_entity.extras.get("external_trace_id")
parent_trace_context = self._application_generate_entity.extras.get("parent_trace_context")
trace_task = TraceTask(
TraceTaskName.WORKFLOW_TRACE,
workflow_execution=execution,
conversation_id=conversation_id,
user_id=self._trace_manager.user_id,
external_trace_id=external_trace_id,
parent_trace_context=parent_trace_context,
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
TelemetryFacade.emit(
TelemetryEvent(
name="workflow",
context=TelemetryContext(
tenant_id=self._application_generate_entity.app_config.tenant_id,
user_id=self._trace_manager.user_id,
app_id=self._application_generate_entity.app_config.app_id,
),
payload={
"workflow_execution": execution,
"conversation_id": conversation_id,
"user_id": self._trace_manager.user_id,
"external_trace_id": external_trace_id,
"parent_trace_context": parent_trace_context,
},
),
trace_manager=self._trace_manager,
)
self._trace_manager.add_trace_task(trace_task)
def _enqueue_node_trace_task(self, domain_execution: WorkflowNodeExecution) -> None:
if not self._trace_manager:
return
from enterprise.telemetry.exporter import is_enterprise_telemetry_enabled
if not is_enterprise_telemetry_enabled():
return
execution = self._get_workflow_execution()
meta = domain_execution.metadata or {}
@ -494,11 +499,20 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
if parent_trace_context:
node_data["parent_trace_context"] = parent_trace_context
trace_task = TraceTask(
TraceTaskName.NODE_EXECUTION_TRACE,
node_execution_data=node_data,
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
TelemetryFacade.emit(
TelemetryEvent(
name="node_execution",
context=TelemetryContext(
tenant_id=node_data.get("tenant_id"),
user_id=node_data.get("user_id"),
app_id=node_data.get("app_id"),
),
payload={"node_execution_data": node_data},
),
trace_manager=self._trace_manager,
)
self._trace_manager.add_trace_task(trace_task)
def _system_variables(self) -> Mapping[str, Any]:
runtime_state = self.graph_runtime_state

View File

@ -4,8 +4,8 @@ from typing import Any, TextIO, Union
from pydantic import BaseModel
from configs import dify_config
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.ops_trace_manager import TraceQueueManager
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
from core.tools.entities.tool_entities import ToolInvokeMessage
_TEXT_COLOR_MAPPING = {
@ -71,15 +71,19 @@ class DifyAgentCallbackHandler(BaseModel):
print_text("\n")
if trace_manager:
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.TOOL_TRACE,
message_id=message_id,
tool_name=tool_name,
tool_inputs=tool_inputs,
tool_outputs=tool_outputs,
timer=timer,
)
TelemetryFacade.emit(
TelemetryEvent(
name="tool",
context=TelemetryContext(app_id=trace_manager.app_id, user_id=trace_manager.user_id),
payload={
"message_id": message_id,
"tool_name": tool_name,
"tool_inputs": tool_inputs,
"tool_outputs": tool_outputs,
"timer": timer,
},
),
trace_manager=trace_manager,
)
def on_tool_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any):

View File

@ -25,10 +25,9 @@ from core.model_runtime.entities.llm_entities import LLMResult
from core.model_runtime.entities.message_entities import PromptMessage, SystemPromptMessage, UserPromptMessage
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.utils import measure_time
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
from extensions.ext_database import db
from extensions.ext_storage import storage
@ -94,15 +93,17 @@ class LLMGenerator:
name = name[:75] + "..."
# get tracing instance
trace_manager = TraceQueueManager(app_id=app_id)
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.GENERATE_NAME_TRACE,
conversation_id=conversation_id,
generate_conversation_name=name,
inputs=prompt,
timer=timer,
tenant_id=tenant_id,
TelemetryFacade.emit(
TelemetryEvent(
name="generate_name",
context=TelemetryContext(tenant_id=tenant_id, app_id=app_id),
payload={
"conversation_id": conversation_id,
"generate_conversation_name": name,
"inputs": prompt,
"timer": timer,
"tenant_id": tenant_id,
},
)
)
@ -787,25 +788,29 @@ class LLMGenerator:
total_price = None
currency = None
trace_manager = TraceQueueManager(app_id=app_id)
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.PROMPT_GENERATION_TRACE,
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type=operation_type,
instruction=instruction,
generated_output=generated_output,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
model_provider=model_provider,
model_name=model_name,
latency=latency,
total_price=total_price,
currency=currency,
timer=timer,
error=error,
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
TelemetryFacade.emit(
TelemetryEvent(
name="prompt_generation",
context=TelemetryContext(tenant_id=tenant_id, user_id=user_id, app_id=app_id),
payload={
"tenant_id": tenant_id,
"user_id": user_id,
"app_id": app_id,
"operation_type": operation_type,
"instruction": instruction,
"generated_output": generated_output,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": total_tokens,
"model_provider": model_provider,
"model_name": model_name,
"latency": latency,
"total_price": total_price,
"currency": currency,
"timer": timer,
"error": error,
},
)
)

View File

@ -5,9 +5,9 @@ from typing import Any
from core.app.app_config.entities import AppConfig
from core.moderation.base import ModerationAction, ModerationError
from core.moderation.factory import ModerationFactory
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.ops_trace_manager import TraceQueueManager
from core.ops.utils import measure_time
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
logger = logging.getLogger(__name__)
@ -49,14 +49,18 @@ class InputModeration:
moderation_result = moderation_factory.moderation_for_inputs(inputs, query)
if trace_manager:
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.MODERATION_TRACE,
message_id=message_id,
moderation_result=moderation_result,
inputs=inputs,
timer=timer,
)
TelemetryFacade.emit(
TelemetryEvent(
name="moderation",
context=TelemetryContext(tenant_id=tenant_id, app_id=app_id),
payload={
"message_id": message_id,
"moderation_result": moderation_result,
"inputs": inputs,
"timer": timer,
},
),
trace_manager=trace_manager,
)
if not moderation_result.flagged:

View File

@ -9,8 +9,8 @@ from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
class BaseTraceInfo(BaseModel):
message_id: str | None = None
message_data: Any | None = None
inputs: Union[str, dict[str, Any], list] | None = None
outputs: Union[str, dict[str, Any], list] | None = None
inputs: Union[str, dict[str, Any], list[Any]] | None = None
outputs: Union[str, dict[str, Any], list[Any]] | None = None
start_time: datetime | None = None
end_time: datetime | None = None
metadata: dict[str, Any]
@ -18,7 +18,7 @@ class BaseTraceInfo(BaseModel):
@field_validator("inputs", "outputs")
@classmethod
def ensure_type(cls, v):
def ensure_type(cls, v: str | dict[str, Any] | list[Any] | None) -> str | dict[str, Any] | list[Any] | None:
if v is None:
return None
if isinstance(v, str | dict | list):
@ -63,7 +63,7 @@ class MessageTraceInfo(BaseTraceInfo):
answer_tokens: int
total_tokens: int
error: str | None = None
file_list: Union[str, dict[str, Any], list] | None = None
file_list: Union[str, dict[str, Any], list[Any]] | None = None
message_file_data: Any | None = None
conversation_mode: str
gen_ai_server_time_to_first_token: float | None = None
@ -110,7 +110,7 @@ class ToolTraceInfo(BaseTraceInfo):
tool_config: dict[str, Any]
time_cost: Union[int, float]
tool_parameters: dict[str, Any]
file_url: Union[str, None, list] = None
file_url: Union[str, None, list[str]] = None
class GenerateNameTraceInfo(BaseTraceInfo):

View File

@ -1272,9 +1272,9 @@ class TraceQueueManager:
self.trace_instance = OpsTraceManager.get_ops_trace_instance(app_id)
self.flask_app = current_app._get_current_object() # type: ignore
from enterprise.telemetry.exporter import is_enterprise_telemetry_enabled
from core.telemetry import is_telemetry_enabled
self._enterprise_telemetry_enabled = is_enterprise_telemetry_enabled()
self._enterprise_telemetry_enabled = is_telemetry_enabled()
if trace_manager_timer is None:
self.start_timer()

View File

@ -27,8 +27,7 @@ from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool
from core.model_runtime.entities.model_entities import ModelFeature, ModelType
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.ops_trace_manager import TraceQueueManager
from core.ops.utils import measure_time
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate
@ -56,6 +55,7 @@ from core.rag.retrieval.template_prompts import (
METADATA_FILTER_USER_PROMPT_2,
METADATA_FILTER_USER_PROMPT_3,
)
from core.telemetry import TelemetryContext, TelemetryEvent, TelemetryFacade
from core.tools.signature import sign_upload_file
from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
from extensions.ext_database import db
@ -728,10 +728,21 @@ class DatasetRetrieval:
self.application_generate_entity.trace_manager if self.application_generate_entity else None
)
if trace_manager:
trace_manager.add_trace_task(
TraceTask(
TraceTaskName.DATASET_RETRIEVAL_TRACE, message_id=message_id, documents=documents, timer=timer
)
app_config = self.application_generate_entity.app_config if self.application_generate_entity else None
TelemetryFacade.emit(
TelemetryEvent(
name="dataset_retrieval",
context=TelemetryContext(
tenant_id=app_config.tenant_id if app_config else None,
app_id=app_config.app_id if app_config else None,
),
payload={
"message_id": message_id,
"documents": documents,
"timer": timer,
},
),
trace_manager=trace_manager,
)
def _on_query(

View File

@ -0,0 +1,4 @@
from core.telemetry.events import TelemetryContext, TelemetryEvent
from core.telemetry.facade import TelemetryFacade, emit, is_telemetry_enabled
__all__ = ["TelemetryContext", "TelemetryEvent", "TelemetryFacade", "emit", "is_telemetry_enabled"]

View File

@ -0,0 +1,18 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
@dataclass(frozen=True)
class TelemetryContext:
tenant_id: str | None = None
user_id: str | None = None
app_id: str | None = None
@dataclass(frozen=True)
class TelemetryEvent:
name: str
context: TelemetryContext
payload: dict[str, Any]

View File

@ -0,0 +1,55 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from core.telemetry.events import TelemetryEvent
if TYPE_CHECKING:
from core.ops.ops_trace_manager import TraceQueueManager
class TelemetryFacade:
@staticmethod
def emit(event: TelemetryEvent, trace_manager: TraceQueueManager | None = None) -> None:
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
trace_task_name_map = {
"draft_node_execution": TraceTaskName.DRAFT_NODE_EXECUTION_TRACE,
"dataset_retrieval": TraceTaskName.DATASET_RETRIEVAL_TRACE,
"generate_name": TraceTaskName.GENERATE_NAME_TRACE,
"message": TraceTaskName.MESSAGE_TRACE,
"moderation": TraceTaskName.MODERATION_TRACE,
"node_execution": TraceTaskName.NODE_EXECUTION_TRACE,
"prompt_generation": TraceTaskName.PROMPT_GENERATION_TRACE,
"suggested_question": TraceTaskName.SUGGESTED_QUESTION_TRACE,
"tool": TraceTaskName.TOOL_TRACE,
"workflow": TraceTaskName.WORKFLOW_TRACE,
}
trace_task_name = trace_task_name_map.get(event.name)
if not trace_task_name:
return
trace_queue_manager = trace_manager or TraceQueueManager(
app_id=event.context.app_id,
user_id=event.context.user_id,
)
trace_queue_manager.add_trace_task(
TraceTask(
trace_task_name,
**event.payload,
)
)
def is_telemetry_enabled() -> bool:
try:
from enterprise.telemetry.exporter import is_enterprise_telemetry_enabled
except Exception:
return False
return is_enterprise_telemetry_enabled()
def emit(event: TelemetryEvent, trace_manager: TraceQueueManager | None = None) -> None:
TelemetryFacade.emit(event, trace_manager=trace_manager)