From f56d2996ca89989f9d80cde60650684c53e4caae Mon Sep 17 00:00:00 2001 From: lkchen Date: Fri, 11 Jul 2025 23:04:45 -0700 Subject: [PATCH] [Misc] Respect `no_use_tqdm_on_load` flag while capturing CUDA graph (#20834) Signed-off-by: Linkun --- vllm/v1/worker/gpu_model_runner.py | 6 ++++-- vllm/worker/model_runner.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f3279fa5fa..44de1469d1 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2270,8 +2270,10 @@ class GPUModelRunner(LoRAModelRunnerMixin): # Only rank 0 should print progress bar during capture compilation_cases = reversed(self.cudagraph_batch_sizes) if is_global_first_rank(): - compilation_cases = tqdm(list(compilation_cases), - desc="Capturing CUDA graph shapes") + compilation_cases = tqdm( + list(compilation_cases), + disable=not self.load_config.use_tqdm_on_load, + desc="Capturing CUDA graph shapes") for num_tokens in compilation_cases: # We skip EPLB here since we don't want to record dummy metrics for _ in range( diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 9d936f3dbf..4fe70a0abf 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1587,6 +1587,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]): if get_tensor_model_parallel_rank() == 0: compilation_cases = tqdm( list(compilation_cases), + disable=not self.load_config.use_tqdm_on_load, desc="Capturing CUDA graph shapes") for batch_size, use_inputs_embeds in compilation_cases: attn_metadata = (