From 6d76bd034a1ea28daefcf2f646e96e7dc003db11 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Tue, 12 Aug 2025 22:54:47 -0400 Subject: [PATCH] revert kv connector fix Signed-off-by: Sage Moore --- vllm/v1/worker/gpu_model_runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 1d5fa4b9f7..a821d4e8c2 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2031,6 +2031,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): # Run the model. # Use persistent buffers for CUDA graphs. +<<<<<<< HEAD # when DBO is enabled, `num_tokens_after_padding` # represents the per-ubatch DP token count. dp_tokens_for_forward = num_tokens_after_padding @@ -2044,6 +2045,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): num_tokens_across_dp=dp_tokens_for_forward, skip_cuda_graphs=skip_cuda_graphs): self.maybe_setup_kv_connector(scheduler_output) +======= + self.maybe_setup_kv_connector(scheduler_output) +>>>>>>> db77e4a92 (revert kv connector fix) model_output = self._run_model( attn_metadata=attn_metadata, num_scheduled_tokens=num_input_tokens,