Merge remote-tracking branch 'upstream/main' into feat/rag-2

2026-05-05 18:08:07 +08:00 · 2025-09-16 14:59:35 +08:00
parent 1e9fce50a1 bdd85b36a4
commit dd34002db2
791 changed files with 24372 additions and 7085 deletions
--- a/api/core/ops/langfuse_trace/entities/langfuse_trace_entity.py
+++ b/api/core/ops/langfuse_trace/entities/langfuse_trace_entity.py
@ -1,7 +1,7 @@
 from collections.abc import Mapping
 from datetime import datetime
 from enum import StrEnum
-from typing import Any, Optional, Union
+from typing import Any, Union

 from pydantic import BaseModel, ConfigDict, Field, field_validator
 from pydantic_core.core_schema import ValidationInfo
@ -52,50 +52,50 @@ class LangfuseTrace(BaseModel):
    Langfuse trace model
    """

-    id: Optional[str] = Field(
+    id: str | None = Field(
        default=None,
        description="The id of the trace can be set, defaults to a random id. Used to link traces to external systems "
        "or when creating a distributed trace. Traces are upserted on id.",
    )
-    name: Optional[str] = Field(
+    name: str | None = Field(
        default=None,
        description="Identifier of the trace. Useful for sorting/filtering in the UI.",
    )
-    input: Optional[Union[str, dict[str, Any], list, None]] = Field(
+    input: Union[str, dict[str, Any], list, None] | None = Field(
        default=None, description="The input of the trace. Can be any JSON object."
    )
-    output: Optional[Union[str, dict[str, Any], list, None]] = Field(
+    output: Union[str, dict[str, Any], list, None] | None = Field(
        default=None, description="The output of the trace. Can be any JSON object."
    )
-    metadata: Optional[dict[str, Any]] = Field(
+    metadata: dict[str, Any] | None = Field(
        default=None,
        description="Additional metadata of the trace. Can be any JSON object. Metadata is merged when being updated "
        "via the API.",
    )
-    user_id: Optional[str] = Field(
+    user_id: str | None = Field(
        default=None,
        description="The id of the user that triggered the execution. Used to provide user-level analytics.",
    )
-    session_id: Optional[str] = Field(
+    session_id: str | None = Field(
        default=None,
        description="Used to group multiple traces into a session in Langfuse. Use your own session/thread identifier.",
    )
-    version: Optional[str] = Field(
+    version: str | None = Field(
        default=None,
        description="The version of the trace type. Used to understand how changes to the trace type affect metrics. "
        "Useful in debugging.",
    )
-    release: Optional[str] = Field(
+    release: str | None = Field(
        default=None,
        description="The release identifier of the current deployment. Used to understand how changes of different "
        "deployments affect metrics. Useful in debugging.",
    )
-    tags: Optional[list[str]] = Field(
+    tags: list[str] | None = Field(
        default=None,
        description="Tags are used to categorize or label traces. Traces can be filtered by tags in the UI and GET "
        "API. Tags can also be changed in the UI. Tags are merged and never deleted via the API.",
    )
-    public: Optional[bool] = Field(
+    public: bool | None = Field(
        default=None,
        description="You can make a trace public to share it via a public link. This allows others to view the trace "
        "without needing to log in or be members of your Langfuse project.",
@ -113,61 +113,61 @@ class LangfuseSpan(BaseModel):
    Langfuse span model
    """

-    id: Optional[str] = Field(
+    id: str | None = Field(
        default=None,
        description="The id of the span can be set, otherwise a random id is generated. Spans are upserted on id.",
    )
-    session_id: Optional[str] = Field(
+    session_id: str | None = Field(
        default=None,
        description="Used to group multiple spans into a session in Langfuse. Use your own session/thread identifier.",
    )
-    trace_id: Optional[str] = Field(
+    trace_id: str | None = Field(
        default=None,
        description="The id of the trace the span belongs to. Used to link spans to traces.",
    )
-    user_id: Optional[str] = Field(
+    user_id: str | None = Field(
        default=None,
        description="The id of the user that triggered the execution. Used to provide user-level analytics.",
    )
-    start_time: Optional[datetime | str] = Field(
+    start_time: datetime | str | None = Field(
        default_factory=datetime.now,
        description="The time at which the span started, defaults to the current time.",
    )
-    end_time: Optional[datetime | str] = Field(
+    end_time: datetime | str | None = Field(
        default=None,
        description="The time at which the span ended. Automatically set by span.end().",
    )
-    name: Optional[str] = Field(
+    name: str | None = Field(
        default=None,
        description="Identifier of the span. Useful for sorting/filtering in the UI.",
    )
-    metadata: Optional[dict[str, Any]] = Field(
+    metadata: dict[str, Any] | None = Field(
        default=None,
        description="Additional metadata of the span. Can be any JSON object. Metadata is merged when being updated "
        "via the API.",
    )
-    level: Optional[str] = Field(
+    level: str | None = Field(
        default=None,
        description="The level of the span. Can be DEBUG, DEFAULT, WARNING or ERROR. Used for sorting/filtering of "
        "traces with elevated error levels and for highlighting in the UI.",
    )
-    status_message: Optional[str] = Field(
+    status_message: str | None = Field(
        default=None,
        description="The status message of the span. Additional field for context of the event. E.g. the error "
        "message of an error event.",
    )
-    input: Optional[Union[str, Mapping[str, Any], list, None]] = Field(
+    input: Union[str, Mapping[str, Any], list, None] | None = Field(
        default=None, description="The input of the span. Can be any JSON object."
    )
-    output: Optional[Union[str, Mapping[str, Any], list, None]] = Field(
+    output: Union[str, Mapping[str, Any], list, None] | None = Field(
        default=None, description="The output of the span. Can be any JSON object."
    )
-    version: Optional[str] = Field(
+    version: str | None = Field(
        default=None,
        description="The version of the span type. Used to understand how changes to the span type affect metrics. "
        "Useful in debugging.",
    )
-    parent_observation_id: Optional[str] = Field(
+    parent_observation_id: str | None = Field(
        default=None,
        description="The id of the observation the span belongs to. Used to link spans to observations.",
    )
@ -188,15 +188,15 @@ class UnitEnum(StrEnum):


 class GenerationUsage(BaseModel):
-    promptTokens: Optional[int] = None
-    completionTokens: Optional[int] = None
-    total: Optional[int] = None
-    input: Optional[int] = None
-    output: Optional[int] = None
-    unit: Optional[UnitEnum] = None
-    inputCost: Optional[float] = None
-    outputCost: Optional[float] = None
-    totalCost: Optional[float] = None
+    promptTokens: int | None = None
+    completionTokens: int | None = None
+    total: int | None = None
+    input: int | None = None
+    output: int | None = None
+    unit: UnitEnum | None = None
+    inputCost: float | None = None
+    outputCost: float | None = None
+    totalCost: float | None = None

    @field_validator("input", "output")
    @classmethod
@ -206,69 +206,69 @@ class GenerationUsage(BaseModel):


 class LangfuseGeneration(BaseModel):
-    id: Optional[str] = Field(
+    id: str | None = Field(
        default=None,
        description="The id of the generation can be set, defaults to random id.",
    )
-    trace_id: Optional[str] = Field(
+    trace_id: str | None = Field(
        default=None,
        description="The id of the trace the generation belongs to. Used to link generations to traces.",
    )
-    parent_observation_id: Optional[str] = Field(
+    parent_observation_id: str | None = Field(
        default=None,
        description="The id of the observation the generation belongs to. Used to link generations to observations.",
    )
-    name: Optional[str] = Field(
+    name: str | None = Field(
        default=None,
        description="Identifier of the generation. Useful for sorting/filtering in the UI.",
    )
-    start_time: Optional[datetime | str] = Field(
+    start_time: datetime | str | None = Field(
        default_factory=datetime.now,
        description="The time at which the generation started, defaults to the current time.",
    )
-    completion_start_time: Optional[datetime | str] = Field(
+    completion_start_time: datetime | str | None = Field(
        default=None,
        description="The time at which the completion started (streaming). Set it to get latency analytics broken "
        "down into time until completion started and completion duration.",
    )
-    end_time: Optional[datetime | str] = Field(
+    end_time: datetime | str | None = Field(
        default=None,
        description="The time at which the generation ended. Automatically set by generation.end().",
    )
-    model: Optional[str] = Field(default=None, description="The name of the model used for the generation.")
-    model_parameters: Optional[dict[str, Any]] = Field(
+    model: str | None = Field(default=None, description="The name of the model used for the generation.")
+    model_parameters: dict[str, Any] | None = Field(
        default=None,
        description="The parameters of the model used for the generation; can be any key-value pairs.",
    )
-    input: Optional[Any] = Field(
+    input: Any | None = Field(
        default=None,
        description="The prompt used for the generation. Can be any string or JSON object.",
    )
-    output: Optional[Any] = Field(
+    output: Any | None = Field(
        default=None,
        description="The completion generated by the model. Can be any string or JSON object.",
    )
-    usage: Optional[GenerationUsage] = Field(
+    usage: GenerationUsage | None = Field(
        default=None,
        description="The usage object supports the OpenAi structure with tokens and a more generic version with "
        "detailed costs and units.",
    )
-    metadata: Optional[dict[str, Any]] = Field(
+    metadata: dict[str, Any] | None = Field(
        default=None,
        description="Additional metadata of the generation. Can be any JSON object. Metadata is merged when being "
        "updated via the API.",
    )
-    level: Optional[LevelEnum] = Field(
+    level: LevelEnum | None = Field(
        default=None,
        description="The level of the generation. Can be DEBUG, DEFAULT, WARNING or ERROR. Used for sorting/filtering "
        "of traces with elevated error levels and for highlighting in the UI.",
    )
-    status_message: Optional[str] = Field(
+    status_message: str | None = Field(
        default=None,
        description="The status message of the generation. Additional field for context of the event. E.g. the error "
        "message of an error event.",
    )
-    version: Optional[str] = Field(
+    version: str | None = Field(
        default=None,
        description="The version of the generation type. Used to understand how changes to the span type affect "
        "metrics. Useful in debugging.",
--- a/api/core/ops/langfuse_trace/langfuse_trace.py
+++ b/api/core/ops/langfuse_trace/langfuse_trace.py
@ -1,7 +1,6 @@
 import logging
 import os
 from datetime import datetime, timedelta
-from typing import Optional

 from langfuse import Langfuse  # type: ignore
 from sqlalchemy.orm import sessionmaker
@ -145,13 +144,13 @@ class LangFuseDataTrace(BaseTraceInstance):
            if node_type == NodeType.LLM:
                inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {}
            else:
-                inputs = node_execution.inputs if node_execution.inputs else {}
-            outputs = node_execution.outputs if node_execution.outputs else {}
+                inputs = node_execution.inputs or {}
+            outputs = node_execution.outputs or {}
            created_at = node_execution.created_at or datetime.now()
            elapsed_time = node_execution.elapsed_time
            finished_at = created_at + timedelta(seconds=elapsed_time)

-            execution_metadata = node_execution.metadata if node_execution.metadata else {}
+            execution_metadata = node_execution.metadata or {}
            metadata = {str(k): v for k, v in execution_metadata.items()}
            metadata.update(
                {
@ -164,7 +163,7 @@ class LangFuseDataTrace(BaseTraceInstance):
                    "status": status,
                }
            )
-            process_data = node_execution.process_data if node_execution.process_data else {}
+            process_data = node_execution.process_data or {}
            model_provider = process_data.get("model_provider", None)
            model_name = process_data.get("model_name", None)
            if model_provider is not None and model_name is not None:
@ -242,7 +241,7 @@ class LangFuseDataTrace(BaseTraceInstance):

        user_id = message_data.from_account_id
        if message_data.from_end_user_id:
-            end_user_data: Optional[EndUser] = (
+            end_user_data: EndUser | None = (
                db.session.query(EndUser).where(EndUser.id == message_data.from_end_user_id).first()
            )
            if end_user_data is not None:
@ -399,7 +398,7 @@ class LangFuseDataTrace(BaseTraceInstance):
        )
        self.add_span(langfuse_span_data=name_generation_span_data)

-    def add_trace(self, langfuse_trace_data: Optional[LangfuseTrace] = None):
+    def add_trace(self, langfuse_trace_data: LangfuseTrace | None = None):
        format_trace_data = filter_none_values(langfuse_trace_data.model_dump()) if langfuse_trace_data else {}
        try:
            self.langfuse_client.trace(**format_trace_data)
@ -407,7 +406,7 @@ class LangFuseDataTrace(BaseTraceInstance):
        except Exception as e:
            raise ValueError(f"LangFuse Failed to create trace: {str(e)}")

-    def add_span(self, langfuse_span_data: Optional[LangfuseSpan] = None):
+    def add_span(self, langfuse_span_data: LangfuseSpan | None = None):
        format_span_data = filter_none_values(langfuse_span_data.model_dump()) if langfuse_span_data else {}
        try:
            self.langfuse_client.span(**format_span_data)
@ -415,12 +414,12 @@ class LangFuseDataTrace(BaseTraceInstance):
        except Exception as e:
            raise ValueError(f"LangFuse Failed to create span: {str(e)}")

-    def update_span(self, span, langfuse_span_data: Optional[LangfuseSpan] = None):
+    def update_span(self, span, langfuse_span_data: LangfuseSpan | None = None):
        format_span_data = filter_none_values(langfuse_span_data.model_dump()) if langfuse_span_data else {}

        span.end(**format_span_data)

-    def add_generation(self, langfuse_generation_data: Optional[LangfuseGeneration] = None):
+    def add_generation(self, langfuse_generation_data: LangfuseGeneration | None = None):
        format_generation_data = (
            filter_none_values(langfuse_generation_data.model_dump()) if langfuse_generation_data else {}
        )
@ -430,7 +429,7 @@ class LangFuseDataTrace(BaseTraceInstance):
        except Exception as e:
            raise ValueError(f"LangFuse Failed to create generation: {str(e)}")

-    def update_generation(self, generation, langfuse_generation_data: Optional[LangfuseGeneration] = None):
+    def update_generation(self, generation, langfuse_generation_data: LangfuseGeneration | None = None):
        format_generation_data = (
            filter_none_values(langfuse_generation_data.model_dump()) if langfuse_generation_data else {}
        )