[ci] Use env var to control whether to use S3 bucket in CI (#13634)
This commit is contained in:
@ -2,16 +2,12 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import LoadFormat
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.engine.llm_engine import LLMEngine
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("block_size", [16])
|
||||
def test_computed_prefix_blocks(model: str, block_size: int):
|
||||
# This test checks if we are able to run the engine to completion
|
||||
@ -28,7 +24,6 @@ def test_computed_prefix_blocks(model: str, block_size: int):
|
||||
"decoration.")
|
||||
|
||||
engine_args = EngineArgs(model=model,
|
||||
load_format=LoadFormat.RUNAI_STREAMER,
|
||||
block_size=block_size,
|
||||
enable_prefix_caching=True)
|
||||
|
||||
|
||||
@ -2,15 +2,11 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import LoadFormat
|
||||
from vllm.entrypoints.llm import LLM
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
def test_computed_prefix_blocks(model: str):
|
||||
# This test checks if the engine generates completions both with and
|
||||
# without optional detokenization, that detokenization includes text
|
||||
@ -21,7 +17,7 @@ def test_computed_prefix_blocks(model: str):
|
||||
"paper clips? Is there an easy to follow video tutorial available "
|
||||
"online for free?")
|
||||
|
||||
llm = LLM(model=model, load_format=LoadFormat.RUNAI_STREAMER)
|
||||
llm = LLM(model=model)
|
||||
sampling_params = SamplingParams(max_tokens=10,
|
||||
temperature=0.0,
|
||||
detokenize=False)
|
||||
|
||||
@ -6,17 +6,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import LoadFormat
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
from vllm.engine.llm_engine import LLMEngine
|
||||
from vllm.executor.uniproc_executor import UniProcExecutor
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
RUNAI_STREAMER_LOAD_FORMAT = LoadFormat.RUNAI_STREAMER
|
||||
|
||||
|
||||
class Mock:
|
||||
...
|
||||
@ -38,12 +33,10 @@ class CustomUniExecutor(UniProcExecutor):
|
||||
CustomUniExecutorAsync = CustomUniExecutor
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
def test_custom_executor_type_checking(model):
|
||||
with pytest.raises(ValueError):
|
||||
engine_args = EngineArgs(model=model,
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT,
|
||||
distributed_executor_backend=Mock)
|
||||
LLMEngine.from_engine_args(engine_args)
|
||||
with pytest.raises(ValueError):
|
||||
@ -52,8 +45,7 @@ def test_custom_executor_type_checking(model):
|
||||
AsyncLLMEngine.from_engine_args(engine_args)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
def test_custom_executor(model, tmp_path):
|
||||
cwd = os.path.abspath(".")
|
||||
os.chdir(tmp_path)
|
||||
@ -62,7 +54,6 @@ def test_custom_executor(model, tmp_path):
|
||||
|
||||
engine_args = EngineArgs(
|
||||
model=model,
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT,
|
||||
distributed_executor_backend=CustomUniExecutor,
|
||||
enforce_eager=True, # reduce test time
|
||||
)
|
||||
@ -77,8 +68,7 @@ def test_custom_executor(model, tmp_path):
|
||||
os.chdir(cwd)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
def test_custom_executor_async(model, tmp_path):
|
||||
cwd = os.path.abspath(".")
|
||||
os.chdir(tmp_path)
|
||||
@ -87,7 +77,6 @@ def test_custom_executor_async(model, tmp_path):
|
||||
|
||||
engine_args = AsyncEngineArgs(
|
||||
model=model,
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT,
|
||||
distributed_executor_backend=CustomUniExecutorAsync,
|
||||
enforce_eager=True, # reduce test time
|
||||
)
|
||||
@ -106,8 +95,7 @@ def test_custom_executor_async(model, tmp_path):
|
||||
os.chdir(cwd)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
def test_respect_ray(model):
|
||||
# even for TP=1 and PP=1,
|
||||
# if users specify ray, we should use ray.
|
||||
@ -116,7 +104,6 @@ def test_respect_ray(model):
|
||||
engine_args = EngineArgs(
|
||||
model=model,
|
||||
distributed_executor_backend="ray",
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT,
|
||||
enforce_eager=True, # reduce test time
|
||||
)
|
||||
engine = LLMEngine.from_engine_args(engine_args)
|
||||
|
||||
@ -2,22 +2,19 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import LoadFormat
|
||||
from vllm.entrypoints.llm import LLM
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
|
||||
def test_skip_tokenizer_initialization(model: str):
|
||||
# This test checks if the flag skip_tokenizer_init skips the initialization
|
||||
# of tokenizer and detokenizer. The generated output is expected to contain
|
||||
# token ids.
|
||||
llm = LLM(model=model,
|
||||
skip_tokenizer_init=True,
|
||||
load_format=LoadFormat.RUNAI_STREAMER)
|
||||
llm = LLM(
|
||||
model=model,
|
||||
skip_tokenizer_init=True,
|
||||
)
|
||||
sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True)
|
||||
|
||||
with pytest.raises(ValueError, match="cannot pass text prompts when"):
|
||||
|
||||
Reference in New Issue
Block a user