From d346ec695ef5dc74cde338a6bc3857e91c311ab2 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 27 Sep 2025 12:45:20 +0800 Subject: [PATCH] [CI/Build] Consolidate model loader tests and requirements (#25765) Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 19 ++----- .github/mergify.yml | 2 +- docker/Dockerfile | 2 +- requirements/nightly_torch_test.txt | 3 +- requirements/rocm.txt | 5 +- requirements/test.in | 3 +- requirements/test.txt | 10 ++-- setup.py | 5 +- tests/model_executor/conftest.py | 52 ------------------- .../fastsafetensors_loader/__init__.py | 0 .../test_fastsafetensors_loader.py | 0 .../test_weight_utils.py | 0 .../runai_model_streamer}/__init__.py | 0 .../test_runai_model_streamer_loader.py | 0 .../runai_model_streamer}/test_runai_utils.py | 0 .../test_weight_utils.py | 0 .../tensorizer_loader/__init__.py | 0 .../tensorizer_loader/conftest.py | 0 .../tensorizer_loader/test_tensorizer.py | 2 +- .../model_loader/weight_utils.py | 35 +++++++++++-- 20 files changed, 48 insertions(+), 90 deletions(-) delete mode 100644 tests/model_executor/conftest.py rename tests/{ => model_executor/model_loader}/fastsafetensors_loader/__init__.py (100%) rename tests/{ => model_executor/model_loader}/fastsafetensors_loader/test_fastsafetensors_loader.py (100%) rename tests/{ => model_executor/model_loader}/fastsafetensors_loader/test_weight_utils.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/__init__.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/test_runai_model_streamer_loader.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/test_runai_utils.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/test_weight_utils.py (100%) rename tests/{ => model_executor/model_loader}/tensorizer_loader/__init__.py (100%) rename tests/{ => model_executor/model_loader}/tensorizer_loader/conftest.py (100%) rename tests/{ => model_executor/model_loader}/tensorizer_loader/test_tensorizer.py (99%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c178fd372b..82a3b2fc19 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -465,29 +465,18 @@ steps: commands: - pytest -v -s kernels/mamba -- label: Tensorizer Test # 14min - timeout_in_minutes: 25 - mirror_hardwares: [amdexperimental] - source_file_dependencies: - - vllm/model_executor/model_loader - - tests/tensorizer_loader - - tests/entrypoints/openai/test_tensorizer_entrypoint.py - commands: - - apt-get update && apt-get install -y curl libsodium23 - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s tensorizer_loader - - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - -- label: Model Executor Test # 7min - timeout_in_minutes: 20 +- label: Model Executor Test # ??? + timeout_in_minutes: 60 mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/model_executor - tests/model_executor + - tests/entrypoints/openai/test_tensorizer_entrypoint.py commands: - apt-get update && apt-get install -y curl libsodium23 - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s model_executor + - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - label: Benchmarks # 11min timeout_in_minutes: 20 diff --git a/.github/mergify.yml b/.github/mergify.yml index 75ee3e3c55..923f708ea1 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -274,7 +274,7 @@ pull_request_rules: - files~=^vllm/model_executor/model_loader/tensorizer.py - files~=^vllm/model_executor/model_loader/tensorizer_loader.py - files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py - - files~=^tests/tensorizer_loader/ + - files~=^tests/model_executor/model_loader/tensorizer_loader/ actions: assign: users: diff --git a/docker/Dockerfile b/docker/Dockerfile index fad62be798..c2b855be44 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -546,7 +546,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ else \ BITSANDBYTES_VERSION="0.46.1"; \ fi; \ - uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' boto3 runai-model-streamer runai-model-streamer[s3] + uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3]>=0.14.0' ENV VLLM_USAGE_SOURCE production-docker-image diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index a529bf4504..790a18f28b 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -43,7 +43,6 @@ tritonclient==2.51.0 numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2; python_version > '3.9' numpy -runai-model-streamer==0.11.0 -runai-model-streamer-s3==0.11.0 +runai-model-streamer[s3]==0.14.0 fastsafetensors>=0.1.10 pydantic>=2.10 # 2.9 leads to error on python 3.10 diff --git a/requirements/rocm.txt b/requirements/rocm.txt index c129dd345c..c4aabe2a73 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -5,8 +5,6 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req numba == 0.61.2; python_version > '3.9' # Dependencies for AMD GPUs -boto3 -botocore datasets ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. peft @@ -15,7 +13,6 @@ tensorizer==2.10.1 packaging>=24.2 setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 -runai-model-streamer==0.11.0 -runai-model-streamer-s3==0.11.0 +runai-model-streamer[s3]==0.14.0 conch-triton-kernels==1.2.1 timm>=1.0.17 \ No newline at end of file diff --git a/requirements/test.in b/requirements/test.in index 451bd73879..c9496c61a7 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -51,8 +51,7 @@ tritonclient==2.51.0 numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2; python_version > '3.9' numpy -runai-model-streamer==0.11.0 -runai-model-streamer-s3==0.11.0 +runai-model-streamer[s3]==0.14.0 fastsafetensors>=0.1.10 pydantic>=2.10 # 2.9 leads to error on python 3.10 decord==0.6.0 diff --git a/requirements/test.txt b/requirements/test.txt index 3519aa524f..912e04b260 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -72,7 +72,9 @@ blobfile==3.0.0 bm25s==0.2.13 # via mteb boto3==1.35.57 - # via tensorizer + # via + # runai-model-streamer-s3 + # tensorizer botocore==1.35.57 # via # boto3 @@ -925,10 +927,10 @@ rsa==4.9.1 # via google-auth rtree==1.4.0 # via torchgeo -runai-model-streamer==0.11.0 - # via -r requirements/test.in -runai-model-streamer-s3==0.11.0 +runai-model-streamer==0.14.0 # via -r requirements/test.in +runai-model-streamer-s3==0.14.0 + # via runai-model-streamer s3transfer==0.10.3 # via boto3 sacrebleu==2.4.3 diff --git a/setup.py b/setup.py index e4c40d22b9..a8fec8a028 100644 --- a/setup.py +++ b/setup.py @@ -654,10 +654,7 @@ setup( "bench": ["pandas", "datasets"], "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.1.10"], - "runai": [ - "runai-model-streamer >= 0.14.0", "runai-model-streamer-gcs", - "google-cloud-storage", "runai-model-streamer-s3", "boto3" - ], + "runai": ["runai-model-streamer[s3,gcs] >= 0.14.0"], "audio": ["librosa", "soundfile", "mistral_common[audio]"], # Required for audio processing "video": [], # Kept for backwards compatibility diff --git a/tests/model_executor/conftest.py b/tests/model_executor/conftest.py deleted file mode 100644 index c6d89d849e..0000000000 --- a/tests/model_executor/conftest.py +++ /dev/null @@ -1,52 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - - -@pytest.fixture -def sample_regex(): - return (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}" - r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)") - - -@pytest.fixture -def sample_json_schema(): - return { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "age": { - "type": "integer" - }, - "skills": { - "type": "array", - "items": { - "type": "string", - "maxLength": 10 - }, - "minItems": 3 - }, - "work_history": { - "type": "array", - "items": { - "type": "object", - "properties": { - "company": { - "type": "string" - }, - "duration": { - "type": "number" - }, - "position": { - "type": "string" - } - }, - "required": ["company", "position"] - } - } - }, - "required": ["name", "age", "skills", "work_history"] - } diff --git a/tests/fastsafetensors_loader/__init__.py b/tests/model_executor/model_loader/fastsafetensors_loader/__init__.py similarity index 100% rename from tests/fastsafetensors_loader/__init__.py rename to tests/model_executor/model_loader/fastsafetensors_loader/__init__.py diff --git a/tests/fastsafetensors_loader/test_fastsafetensors_loader.py b/tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py similarity index 100% rename from tests/fastsafetensors_loader/test_fastsafetensors_loader.py rename to tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py diff --git a/tests/fastsafetensors_loader/test_weight_utils.py b/tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py similarity index 100% rename from tests/fastsafetensors_loader/test_weight_utils.py rename to tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py diff --git a/tests/runai_model_streamer_test/__init__.py b/tests/model_executor/model_loader/runai_model_streamer/__init__.py similarity index 100% rename from tests/runai_model_streamer_test/__init__.py rename to tests/model_executor/model_loader/runai_model_streamer/__init__.py diff --git a/tests/runai_model_streamer_test/test_runai_model_streamer_loader.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py similarity index 100% rename from tests/runai_model_streamer_test/test_runai_model_streamer_loader.py rename to tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py diff --git a/tests/runai_model_streamer_test/test_runai_utils.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py similarity index 100% rename from tests/runai_model_streamer_test/test_runai_utils.py rename to tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py diff --git a/tests/runai_model_streamer_test/test_weight_utils.py b/tests/model_executor/model_loader/runai_model_streamer/test_weight_utils.py similarity index 100% rename from tests/runai_model_streamer_test/test_weight_utils.py rename to tests/model_executor/model_loader/runai_model_streamer/test_weight_utils.py diff --git a/tests/tensorizer_loader/__init__.py b/tests/model_executor/model_loader/tensorizer_loader/__init__.py similarity index 100% rename from tests/tensorizer_loader/__init__.py rename to tests/model_executor/model_loader/tensorizer_loader/__init__.py diff --git a/tests/tensorizer_loader/conftest.py b/tests/model_executor/model_loader/tensorizer_loader/conftest.py similarity index 100% rename from tests/tensorizer_loader/conftest.py rename to tests/model_executor/model_loader/tensorizer_loader/conftest.py diff --git a/tests/tensorizer_loader/test_tensorizer.py b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py similarity index 99% rename from tests/tensorizer_loader/test_tensorizer.py rename to tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py index e00d7c2f80..f50f046967 100644 --- a/tests/tensorizer_loader/test_tensorizer.py +++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py @@ -14,6 +14,7 @@ import pytest import torch import vllm.model_executor.model_loader.tensorizer +from tests.utils import VLLM_PATH, RemoteOpenAIServer from vllm import LLM, SamplingParams from vllm.engine.arg_utils import EngineArgs # yapf: disable @@ -27,7 +28,6 @@ from vllm.model_executor.model_loader.tensorizer_loader import ( # yapf: enable from vllm.utils import PlaceholderModule -from ..utils import VLLM_PATH, RemoteOpenAIServer from .conftest import DummyExecutor, assert_from_collective_rpc try: diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index cad32fee1d..f52d9dd2f5 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -639,6 +639,19 @@ def runai_safetensors_weights_iterator( yield from tensor_iter +def _init_loader( + pg: torch.distributed.ProcessGroup, + device: torch.device, + f_list: list[str], + *, + nogds: bool = False, +): + loader = SafeTensorsFileLoader(pg, device, nogds=nogds) + rank_file_map = {i: [f] for i, f in enumerate(f_list)} + loader.add_filenames(rank_file_map) + return loader + + def fastsafetensors_weights_iterator( hf_weights_files: list[str], use_tqdm_on_load: bool, @@ -656,17 +669,31 @@ def fastsafetensors_weights_iterator( for i in range(0, len(hf_weights_files), pg.size()) ] + nogds = False + for f_list in tqdm( weight_files_sub_lists, desc="Loading safetensors using Fastsafetensor loader", disable=not enable_tqdm(use_tqdm_on_load), bar_format=_BAR_FORMAT, ): - loader = SafeTensorsFileLoader(pg, device) - rank_file_map = {i: [f] for i, f in enumerate(f_list)} - loader.add_filenames(rank_file_map) + loader = _init_loader(pg, device, f_list, nogds=nogds) try: - fb = loader.copy_files_to_device() + try: + fb = loader.copy_files_to_device() + except RuntimeError as e: + if "gds" not in str(e): + raise + + loader.close() + nogds = True + logger.warning_once( + "GDS not enabled, setting `nogds=True`.\n" + "For more information, see: https://github.com/foundation-model-stack/fastsafetensors?tab=readme-ov-file#basic-api-usages" + ) + loader = _init_loader(pg, device, f_list, nogds=nogds) + fb = loader.copy_files_to_device() + try: keys = list(fb.key_to_rank_lidx.keys()) for k in keys: