Fix: send input and output token usage to Langfuse (#13294)

### What problem does this PR solve? Closes #9837 The Langfuse integration currently only sends the output text to `langfuse_generation.update()` without including token usage information. This means Langfuse cannot track input/output token consumption for cost analysis and monitoring. ### Solution Add the `usage` parameter to `langfuse_generation.update()` with: - `input`: approximate input token count from `message_fit_in()` - `output`: approximate output token count from `num_tokens_from_string(answer)` - `total`: sum of input and output ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2026-05-26 10:47:21 +08:00 · 2026-05-14 13:11:37 +08:00
parent cc21dc7f00
commit d46bbd30f7
1 changed files with 8 additions and 1 deletions
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -809,7 +809,14 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
        if langfuse_tracer and "langfuse_generation" in locals():
            langfuse_output = "\n" + re.sub(r"^.*?(### Query:.*)", r"\1", prompt, flags=re.DOTALL)
            langfuse_output = {"time_elapsed:": re.sub(r"\n", "  \n", langfuse_output), "created_at": time.time()}
-            langfuse_generation.update(output=langfuse_output)
+            langfuse_generation.update(
+                output=langfuse_output,
+                usage_details={
+                    "input": used_token_count,
+                    "output": tk_num,
+                    "total": used_token_count + tk_num,
+                },
+            )
            langfuse_generation.end()

        return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", "  \n", prompt), "created_at": time.time()}