From d46bbd30f7f6f03de6ec3c8ba301b246576abfd6 Mon Sep 17 00:00:00 2001
From: Br1an <932039080@qq.com>
Date: Thu, 14 May 2026 13:11:37 +0800
Subject: [PATCH] Fix: send input and output token usage to Langfuse (#13294)

### What problem does this PR solve?

Closes #9837

The Langfuse integration currently only sends the output text to
`langfuse_generation.update()` without including token usage
information. This means Langfuse cannot track input/output token
consumption for cost analysis and monitoring.

### Solution

Add the `usage` parameter to `langfuse_generation.update()` with:
- `input`: approximate input token count from `message_fit_in()`
- `output`: approximate output token count from
`num_tokens_from_string(answer)`
- `total`: sum of input and output

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
---
 api/db/services/dialog_service.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index 6f981efb5..07dcd14b5 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -809,7 +809,14 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
         if langfuse_tracer and "langfuse_generation" in locals():
             langfuse_output = "\n" + re.sub(r"^.*?(### Query:.*)", r"\1", prompt, flags=re.DOTALL)
             langfuse_output = {"time_elapsed:": re.sub(r"\n", "  \n", langfuse_output), "created_at": time.time()}
-            langfuse_generation.update(output=langfuse_output)
+            langfuse_generation.update(
+                output=langfuse_output,
+                usage_details={
+                    "input": used_token_count,
+                    "output": tk_num,
+                    "total": used_token_count + tk_num,
+                },
+            )
             langfuse_generation.end()
 
         return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", "  \n", prompt), "created_at": time.time()}