From f65da69c72596337a5587245fa24c173b77efd33 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 23 Oct 2025 00:19:05 +0000 Subject: [PATCH] mem --- vllm/v1/worker/gpu_model_runner.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 9e394dbb59..f178b6f130 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -509,6 +509,14 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): pin_memory=self.pin_memory, ) + # device_id = self.device.index + + # def cb(_device, _alloc, _device_alloc, _device_free): + # torch.cuda.memory._dump_snapshot(f"/tmp/vllm_oom_{device_id}.pickle") + + # torch.cuda.memory._record_memory_history(max_entries=100_000) + # torch._C._cuda_attach_out_of_memory_observer(cb) + def reset_mm_cache(self) -> None: if self.mm_budget: self.mm_budget.reset_cache()