[Bugfix][PD] set max_completion_tokens=1 if req has this value (#21841)

Signed-off-by: Abirdcfly <fp544037857@gmail.com>
This commit is contained in:
Abirdcfly
2025-08-01 21:08:45 +08:00
committed by GitHub
parent 0a6d305e0f
commit 5c54d9759d
2 changed files with 4 additions and 0 deletions

View File

@ -293,6 +293,8 @@ class Proxy:
# add params to request
kv_prepare_request = request.copy()
kv_prepare_request["max_tokens"] = 1
if "max_completion_tokens" in kv_prepare_request:
kv_prepare_request["max_completion_tokens"] = 1
# prefill stage
prefill_instance = self.schedule(self.prefill_cycler)

View File

@ -128,6 +128,8 @@ async def handle_request():
prefill_request = original_request_data.copy()
# change max_tokens = 1 to let it only do prefill
prefill_request["max_tokens"] = 1
if "max_completion_tokens" in prefill_request:
prefill_request["max_completion_tokens"] = 1
global count
global prefill_instances