From 61fbfe52742bb34ce8a04d3fb734582268bdcd10 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Tue, 28 Oct 2025 10:18:08 +0800 Subject: [PATCH] [Bugfix] fixed inconsistent finish_reason handling between V0 and V1 engines (#27555) Signed-off-by: chaunceyjiang --- vllm/v1/core/sched/utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vllm/v1/core/sched/utils.py b/vllm/v1/core/sched/utils.py index 8af8a7d278..82166dc978 100644 --- a/vllm/v1/core/sched/utils.py +++ b/vllm/v1/core/sched/utils.py @@ -42,13 +42,6 @@ def remove_all(lst: list, items_to_remove: set) -> list: def check_stop( request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None ) -> bool: - if ( - request.num_tokens >= max_model_len - or request.num_output_tokens >= request.max_tokens - ): - request.status = RequestStatus.FINISHED_LENGTH_CAPPED - return True - if request.pooling_params: if pooler_output is not None: request.status = RequestStatus.FINISHED_STOPPED @@ -70,4 +63,10 @@ def check_stop( request.status = RequestStatus.FINISHED_STOPPED request.stop_reason = last_token_id return True + if ( + request.num_tokens >= max_model_len + or request.num_output_tokens >= request.max_tokens + ): + request.status = RequestStatus.FINISHED_LENGTH_CAPPED + return True return False