[BugFix] Fix GC bug for LLM class (#2882)

2024-02-14 22:17:44 -08:00
parent 31348dff03
commit d7afab6d3a
2 changed files with 169 additions and 157 deletions
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@ -4,6 +4,10 @@ It should include tests that are reported by users and making sure they
 will never happen again.

 """
+import gc
+
+import torch
+
 from vllm import LLM, SamplingParams


@ -35,6 +39,20 @@ def test_max_tokens_none():
    assert len(prompts) == len(outputs)


+def test_gc():
+    llm = LLM("facebook/opt-125m", enforce_eager=True)
+    del llm
+
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    # The memory allocated for model and KV cache should be released.
+    # The memory allocated for PyTorch and others should be less than 50MB.
+    # Usually, it's around 10MB.
+    allocated = torch.cuda.memory_allocated()
+    assert allocated < 50 * 1024 * 1024
+
+
 if __name__ == "__main__":
    import pytest
    pytest.main([__file__])