Remove V0 attention backends (#25351)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@ -5,7 +5,6 @@ from urllib.request import urlopen
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
|
||||
os.environ["VLLM_ATTENTION_BACKEND"] = "DUAL_CHUNK_FLASH_ATTN"
|
||||
os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1"
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user