[Bugfix][PD] set max_completion_tokens=1 if req has this value (#21841)
Signed-off-by: Abirdcfly <fp544037857@gmail.com>
This commit is contained in:
@ -293,6 +293,8 @@ class Proxy:
|
||||
# add params to request
|
||||
kv_prepare_request = request.copy()
|
||||
kv_prepare_request["max_tokens"] = 1
|
||||
if "max_completion_tokens" in kv_prepare_request:
|
||||
kv_prepare_request["max_completion_tokens"] = 1
|
||||
|
||||
# prefill stage
|
||||
prefill_instance = self.schedule(self.prefill_cycler)
|
||||
|
||||
@ -128,6 +128,8 @@ async def handle_request():
|
||||
prefill_request = original_request_data.copy()
|
||||
# change max_tokens = 1 to let it only do prefill
|
||||
prefill_request["max_tokens"] = 1
|
||||
if "max_completion_tokens" in prefill_request:
|
||||
prefill_request["max_completion_tokens"] = 1
|
||||
|
||||
global count
|
||||
global prefill_instances
|
||||
|
||||
Reference in New Issue
Block a user