[Frontend] Add --log-error-stack to print stack trace for error response (#22960)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-08-26 21:58:59 -07:00
committed by GitHub
parent 644d57d531
commit 3210264421
13 changed files with 51 additions and 8 deletions

View File

@ -1749,6 +1749,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None
state.openai_serving_chat = OpenAIServingChat(
engine_client,
@ -1767,6 +1768,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None
state.openai_serving_completion = OpenAIServingCompletion(
engine_client,
@ -1776,6 +1778,7 @@ async def init_app_state(
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None
state.openai_serving_pooling = OpenAIServingPooling(
engine_client,
@ -1784,6 +1787,7 @@ async def init_app_state(
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) if "encode" in supported_tasks else None
state.openai_serving_embedding = OpenAIServingEmbedding(
engine_client,
@ -1792,12 +1796,14 @@ async def init_app_state(
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) if "embed" in supported_tasks else None
state.openai_serving_classification = ServingClassification(
engine_client,
model_config,
state.openai_serving_models,
request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "classify" in supported_tasks else None
enable_serving_reranking = ("classify" in supported_tasks and getattr(
@ -1807,6 +1813,7 @@ async def init_app_state(
model_config,
state.openai_serving_models,
request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if ("embed" in supported_tasks or enable_serving_reranking) else None
state.openai_serving_tokenization = OpenAIServingTokenization(
@ -1816,18 +1823,21 @@ async def init_app_state(
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
)
state.openai_serving_transcription = OpenAIServingTranscription(
engine_client,
model_config,
state.openai_serving_models,
request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "transcription" in supported_tasks else None
state.openai_serving_translation = OpenAIServingTranslation(
engine_client,
model_config,
state.openai_serving_models,
request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "transcription" in supported_tasks else None
state.enable_server_load_tracking = args.enable_server_load_tracking

View File

@ -180,6 +180,8 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
"""Maximum number of HTTP headers allowed in a request for h11 parser.
Helps mitigate header abuse. Default: 256."""
log_error_stack: bool = envs.VLLM_SERVER_DEV_MODE
"""If set to True, log the stack trace of error responses"""
@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

View File

@ -76,13 +76,15 @@ class OpenAIServingChat(OpenAIServing):
enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False,
enable_log_outputs: bool = False,
log_error_stack: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage)
enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack)
self.response_role = response_role
self.chat_template = chat_template

View File

@ -129,12 +129,14 @@ class ServingClassification(ClassificationMixin):
models: OpenAIServingModels,
*,
request_logger: Optional[RequestLogger],
log_error_stack: bool = False,
) -> None:
super().__init__(
engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
log_error_stack=log_error_stack,
)
async def create_classify(

View File

@ -59,6 +59,7 @@ class OpenAIServingCompletion(OpenAIServing):
return_tokens_as_token_ids: bool = False,
enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False,
log_error_stack: bool = False,
):
super().__init__(
engine_client=engine_client,
@ -67,6 +68,7 @@ class OpenAIServingCompletion(OpenAIServing):
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack,
)
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.default_sampling_params = (

View File

@ -593,11 +593,13 @@ class OpenAIServingEmbedding(EmbeddingMixin):
request_logger: Optional[RequestLogger],
chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger)
request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format

View File

@ -5,6 +5,7 @@ import io
import json
import sys
import time
import traceback
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
from concurrent.futures import ThreadPoolExecutor
from http import HTTPStatus
@ -205,6 +206,7 @@ class OpenAIServing:
request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False,
enable_force_include_usage: bool = False,
log_error_stack: bool = False,
):
super().__init__()
@ -222,6 +224,7 @@ class OpenAIServing:
self._async_tokenizer_pool: dict[AnyTokenizer,
AsyncMicrobatchTokenizer] = {}
self.log_error_stack = log_error_stack
def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer:
"""
@ -412,6 +415,12 @@ class OpenAIServing:
message: str,
err_type: str = "BadRequestError",
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
if self.log_error_stack:
exc_type, _, _ = sys.exc_info()
if exc_type is not None:
traceback.print_exc()
else:
traceback.print_stack()
return ErrorResponse(error=ErrorInfo(
message=message, type=err_type, code=status_code.value))

View File

@ -58,11 +58,13 @@ class OpenAIServingPooling(OpenAIServing):
request_logger: Optional[RequestLogger],
chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger)
request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format

View File

@ -88,6 +88,7 @@ class OpenAIServingResponses(OpenAIServing):
enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False,
enable_log_outputs: bool = False,
log_error_stack: bool = False,
) -> None:
super().__init__(
engine_client=engine_client,
@ -96,6 +97,7 @@ class OpenAIServingResponses(OpenAIServing):
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack,
)
self.chat_template = chat_template

View File

@ -47,11 +47,13 @@ class ServingScores(OpenAIServing):
models: OpenAIServingModels,
*,
request_logger: Optional[RequestLogger],
log_error_stack: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger)
request_logger=request_logger,
log_error_stack=log_error_stack)
async def _embedding_score(
self,

View File

@ -39,11 +39,13 @@ class OpenAIServingTokenization(OpenAIServing):
request_logger: Optional[RequestLogger],
chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger)
request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format

View File

@ -32,13 +32,15 @@ class OpenAIServingTranscription(OpenAISpeechToText):
*,
request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False,
log_error_stack: bool = False,
):
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids,
task_type="transcribe")
task_type="transcribe",
log_error_stack=log_error_stack)
async def create_transcription(
self, audio_data: bytes, request: TranscriptionRequest,
@ -88,13 +90,15 @@ class OpenAIServingTranslation(OpenAISpeechToText):
*,
request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False,
log_error_stack: bool = False,
):
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids,
task_type="translate")
task_type="translate",
log_error_stack=log_error_stack)
async def create_translation(
self, audio_data: bytes, request: TranslationRequest,

View File

@ -53,12 +53,14 @@ class OpenAISpeechToText(OpenAIServing):
request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False,
task_type: Literal["transcribe", "translate"] = "transcribe",
log_error_stack: bool = False,
):
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids)
return_tokens_as_token_ids=return_tokens_as_token_ids,
log_error_stack=log_error_stack)
self.default_sampling_params = (
self.model_config.get_diff_sampling_param())