[V0 Deprecation] Remove LLMEngine (#25033)

Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-09-20 17:56:30 -07:00
committed by GitHub
parent 367a480bd3
commit 52c2a8d4ad
29 changed files with 65 additions and 2763 deletions

View File

@ -54,8 +54,7 @@ def test_attention_fusion_v0(example_prompts, monkeypatch, model: str,
# Use global backends
global backend, backend_unfused
use_v1 = False # can be made a param once V1 support added
monkeypatch.setenv("VLLM_USE_V1", str(int(use_v1)))
monkeypatch.setenv("VLLM_USE_V1", "1")
monkeypatch.setenv("VLLM_USE_TRITON_FLASH_ATTN", str(int(use_triton_fa)))
# Prompt 4 seems too open-ended, differs between fused and unfused