[ci] Use env var to control whether to use S3 bucket in CI (#13634)

2025-02-22 19:19:45 -08:00
parent 322d2a27d6
commit 2c5e637b57
30 changed files with 222 additions and 231 deletions
--- a/tests/engine/test_computed_prefix_blocks.py
+++ b/tests/engine/test_computed_prefix_blocks.py
@ -2,16 +2,12 @@

 import pytest

-from vllm.config import LoadFormat
 from vllm.engine.arg_utils import EngineArgs
 from vllm.engine.llm_engine import LLMEngine
 from vllm.sampling_params import SamplingParams

-from ..conftest import MODEL_WEIGHTS_S3_BUCKET

-
-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
@pytest.mark.parametrize("block_size", [16])
 def test_computed_prefix_blocks(model: str, block_size: int):
    # This test checks if we are able to run the engine to completion
@ -28,7 +24,6 @@ def test_computed_prefix_blocks(model: str, block_size: int):
        "decoration.")

    engine_args = EngineArgs(model=model,
-                             load_format=LoadFormat.RUNAI_STREAMER,
                             block_size=block_size,
                             enable_prefix_caching=True)

--- a/tests/engine/test_detokenization.py
+++ b/tests/engine/test_detokenization.py
@ -2,15 +2,11 @@

 import pytest

-from vllm.config import LoadFormat
 from vllm.entrypoints.llm import LLM
 from vllm.sampling_params import SamplingParams

-from ..conftest import MODEL_WEIGHTS_S3_BUCKET

-
-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
 def test_computed_prefix_blocks(model: str):
    # This test checks if the engine generates completions both with and
    # without optional detokenization, that detokenization includes text
@ -21,7 +17,7 @@ def test_computed_prefix_blocks(model: str):
        "paper clips? Is there an easy to follow video tutorial available "
        "online for free?")

-    llm = LLM(model=model, load_format=LoadFormat.RUNAI_STREAMER)
+    llm = LLM(model=model)
    sampling_params = SamplingParams(max_tokens=10,
                                     temperature=0.0,
                                     detokenize=False)
--- a/tests/engine/test_executor.py
+++ b/tests/engine/test_executor.py
@ -6,17 +6,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union

 import pytest

-from vllm.config import LoadFormat
 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.engine.llm_engine import LLMEngine
 from vllm.executor.uniproc_executor import UniProcExecutor
 from vllm.sampling_params import SamplingParams

-from ..conftest import MODEL_WEIGHTS_S3_BUCKET
-
-RUNAI_STREAMER_LOAD_FORMAT = LoadFormat.RUNAI_STREAMER
-

 class Mock:
    ...
@ -38,12 +33,10 @@ class CustomUniExecutor(UniProcExecutor):
 CustomUniExecutorAsync = CustomUniExecutor


-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
 def test_custom_executor_type_checking(model):
    with pytest.raises(ValueError):
        engine_args = EngineArgs(model=model,
-                                 load_format=RUNAI_STREAMER_LOAD_FORMAT,
                                 distributed_executor_backend=Mock)
        LLMEngine.from_engine_args(engine_args)
    with pytest.raises(ValueError):
@ -52,8 +45,7 @@ def test_custom_executor_type_checking(model):
        AsyncLLMEngine.from_engine_args(engine_args)


-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
 def test_custom_executor(model, tmp_path):
    cwd = os.path.abspath(".")
    os.chdir(tmp_path)
@ -62,7 +54,6 @@ def test_custom_executor(model, tmp_path):

        engine_args = EngineArgs(
            model=model,
-            load_format=RUNAI_STREAMER_LOAD_FORMAT,
            distributed_executor_backend=CustomUniExecutor,
            enforce_eager=True,  # reduce test time
        )
@ -77,8 +68,7 @@ def test_custom_executor(model, tmp_path):
        os.chdir(cwd)


-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
 def test_custom_executor_async(model, tmp_path):
    cwd = os.path.abspath(".")
    os.chdir(tmp_path)
@ -87,7 +77,6 @@ def test_custom_executor_async(model, tmp_path):

        engine_args = AsyncEngineArgs(
            model=model,
-            load_format=RUNAI_STREAMER_LOAD_FORMAT,
            distributed_executor_backend=CustomUniExecutorAsync,
            enforce_eager=True,  # reduce test time
        )
@ -106,8 +95,7 @@ def test_custom_executor_async(model, tmp_path):
        os.chdir(cwd)


-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
 def test_respect_ray(model):
    # even for TP=1 and PP=1,
    # if users specify ray, we should use ray.
@ -116,7 +104,6 @@ def test_respect_ray(model):
    engine_args = EngineArgs(
        model=model,
        distributed_executor_backend="ray",
-        load_format=RUNAI_STREAMER_LOAD_FORMAT,
        enforce_eager=True,  # reduce test time
    )
    engine = LLMEngine.from_engine_args(engine_args)
--- a/tests/engine/test_skip_tokenizer_init.py
+++ b/tests/engine/test_skip_tokenizer_init.py
@ -2,22 +2,19 @@

 import pytest

-from vllm.config import LoadFormat
 from vllm.entrypoints.llm import LLM
 from vllm.sampling_params import SamplingParams

-from ..conftest import MODEL_WEIGHTS_S3_BUCKET

-
-@pytest.mark.parametrize("model",
-                         [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
+@pytest.mark.parametrize("model", ["distilbert/distilgpt2"])
 def test_skip_tokenizer_initialization(model: str):
    # This test checks if the flag skip_tokenizer_init skips the initialization
    # of tokenizer and detokenizer. The generated output is expected to contain
    # token ids.
-    llm = LLM(model=model,
-              skip_tokenizer_init=True,
-              load_format=LoadFormat.RUNAI_STREAMER)
+    llm = LLM(
+        model=model,
+        skip_tokenizer_init=True,
+    )
    sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True)

    with pytest.raises(ValueError, match="cannot pass text prompts when"):