[ci] Use env var to control whether to use S3 bucket in CI (#13634)

This commit is contained in:
Kevin H. Luu
2025-02-22 19:19:45 -08:00
committed by GitHub
parent 322d2a27d6
commit 2c5e637b57
30 changed files with 222 additions and 231 deletions

View File

@ -10,8 +10,8 @@ import pytest
from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
from vllm.engine.arg_utils import AsyncEngineArgs
MODEL = "s3://vllm-ci-model-weights/gemma-1.1-2b-it"
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, load_format="runai_streamer")
MODEL = "google/gemma-1.1-2b-it"
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
RAISED_ERROR = KeyError
RAISED_VALUE = "foo"
EXPECTED_TOKENS = 250

View File

@ -21,10 +21,8 @@ from vllm.lora.request import LoRARequest
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser
MODEL = "s3://vllm-ci-model-weights/gemma-1.1-2b-it"
ENGINE_ARGS = AsyncEngineArgs(model=MODEL,
load_format="runai_streamer",
enforce_eager=True)
MODEL = "google/gemma-1.1-2b-it"
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, enforce_eager=True)
RAISED_ERROR = KeyError
RAISED_VALUE = "foo"

View File

@ -10,14 +10,12 @@ import pytest
from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
from vllm.engine.arg_utils import AsyncEngineArgs
MODEL = "s3://vllm-ci-model-weights/gemma-1.1-2b-it"
MODEL = "google/gemma-1.1-2b-it"
NUM_EXPECTED_TOKENS = 10
NUM_REQUESTS = 10000
# Scenarios to test for num generated token.
ENGINE_ARGS = AsyncEngineArgs(model=MODEL,
load_format="runai_streamer",
disable_log_requests=True)
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True)
@pytest.fixture(scope="function")