Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Optional
import pytest
import torch
@ -138,7 +137,7 @@ def test_models(
example_prompts, max_tokens, num_logprobs
)
prompt_embeds: Optional[list[torch.Tensor]] = [] if use_prompt_embeds else None
prompt_embeds: list[torch.Tensor] | None = [] if use_prompt_embeds else None
prompt_token_ids = []
for prompt in example_prompts:

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Callable
from collections.abc import Callable
import pytest

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Adapted from https://huggingface.co/docs/transformers/perplexity
from typing import Optional, cast
from typing import cast
import pytest
import torch
@ -85,7 +85,7 @@ def wikitext_ppl_test(
n_tokens = 0
for output in outputs:
output = cast(TokensTextLogprobsPromptLogprobs, output)
token_datas = cast(list[Optional[dict[int, Logprob]]], output[3])
token_datas = cast(list[dict[int, Logprob] | None], output[3])
assert token_datas[0] is None
token_log_probs = []

View File

@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from typing import Optional
import pytest
@ -13,7 +12,7 @@ def run_embedding_correctness_test(
hf_model: "HfRunner",
inputs: list[str],
vllm_outputs: Sequence[list[float]],
dimensions: Optional[int] = None,
dimensions: int | None = None,
):
hf_outputs = hf_model.encode(inputs)
if dimensions:

View File

@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Optional
import pytest
@ -66,7 +65,7 @@ def test_models(
pooling_type="MEAN", normalize=False
)
max_model_len: Optional[int] = 512
max_model_len: int | None = 512
if model in [
"sentence-transformers/all-MiniLM-L12-v2",
"sentence-transformers/stsb-roberta-base-v2",

View File

@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import numpy as np
import openai
import pytest

View File

@ -3,7 +3,6 @@
import tempfile
from collections.abc import Sequence
from typing import Optional
import mteb
import numpy as np
@ -51,7 +50,7 @@ class VllmMtebEncoder(mteb.Encoder):
def predict(
self,
sentences: list[tuple[str, str, Optional[str]]], # query, corpus, prompt
sentences: list[tuple[str, str, str | None]], # query, corpus, prompt
*args,
**kwargs,
) -> np.ndarray:
@ -100,7 +99,7 @@ class ScoreClientMtebEncoder(mteb.Encoder):
def predict(
self,
sentences: list[tuple[str, str, Optional[str]]], # query, corpus, prompt
sentences: list[tuple[str, str, str | None]], # query, corpus, prompt
*args,
**kwargs,
) -> np.ndarray:
@ -294,7 +293,7 @@ def mteb_test_rerank_models_hf(
original_predict = hf_model.predict
def _predict(
sentences: list[tuple[str, str, Optional[str]]], # query, corpus, prompt
sentences: list[tuple[str, str, str | None]], # query, corpus, prompt
*args,
**kwargs,
):

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any, Optional
from typing import Any
import numpy as np
import pytest
@ -111,7 +111,7 @@ class GemmaMtebEncoder(VllmMtebEncoder):
def predict(
self,
sentences: list[tuple[str, str, Optional[str]]], # query, corpus, prompt
sentences: list[tuple[str, str, str | None]], # query, corpus, prompt
*args,
**kwargs,
) -> np.ndarray:

View File

@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from typing import Optional
import pytest
from transformers import AutoModelForSpeechSeq2Seq
@ -18,8 +17,8 @@ HF_AUDIO_PROMPT = "<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date:
def vllm_to_hf_output(
vllm_output: tuple[list[int], str, Optional[SampleLogprobs]],
) -> tuple[list[int], str, Optional[SampleLogprobs]]:
vllm_output: tuple[list[int], str, SampleLogprobs | None],
) -> tuple[list[int], str, SampleLogprobs | None]:
"""Sanitize hf output to be comparable with vllm output."""
output_ids, output_str, out_logprobs = vllm_output
@ -46,7 +45,7 @@ def run_test(
max_tokens: int,
num_logprobs: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
):
"""Inference result should be the same between hf and vllm.

View File

@ -3,7 +3,6 @@
import os
from collections.abc import Sequence
from typing import Optional
import librosa
import pytest
@ -57,7 +56,7 @@ if current_platform.is_rocm():
def run_test(
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
inputs: Sequence[tuple[list[str], PromptImageInput, Optional[PromptAudioInput]]],
inputs: Sequence[tuple[list[str], PromptImageInput, PromptAudioInput | None]],
model: str,
*,
max_model_len: int,
@ -66,7 +65,7 @@ def run_test(
num_logprobs: int,
mm_limit: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
):
"""Inference result should be the same between hf and vllm.

View File

@ -3,7 +3,6 @@
import os
from collections.abc import Sequence
from typing import Optional
import librosa
import pytest
@ -48,7 +47,7 @@ models = [model_path]
def vllm_to_hf_output(
vllm_output: tuple[list[int], str, Optional[SampleLogprobs]], model: str
vllm_output: tuple[list[int], str, SampleLogprobs | None], model: str
):
"""Sanitize vllm output to be comparable with hf output."""
_, output_str, out_logprobs = vllm_output
@ -79,7 +78,7 @@ if current_platform.is_rocm():
def run_test(
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
inputs: Sequence[tuple[list[str], PromptImageInput, Optional[PromptAudioInput]]],
inputs: Sequence[tuple[list[str], PromptImageInput, PromptAudioInput | None]],
model: str,
*,
max_model_len: int,
@ -88,7 +87,7 @@ def run_test(
num_logprobs: int,
mm_limit: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
):
"""Inference result should be the same between hf and vllm.

View File

@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any
import pytest
from mistral_common.multimodal import download_image
@ -117,7 +117,7 @@ FIXTURE_LOGPROBS_CHAT = {
MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
}
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
OutputsLogprobs = list[tuple[list[int], str, SampleLogprobs | None]]
# For the test author to store golden output in JSON

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any, Optional, TypedDict, Union
from typing import Any, TypedDict
import numpy.typing as npt
import pytest
@ -83,7 +83,7 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):
def batch_make_image_embeddings(
image_batches: list[Union[Image.Image, list[Image.Image]]],
image_batches: list[Image.Image | list[Image.Image]],
processor,
llm: VllmRunner,
) -> list[Qwen2VLPromptImageEmbeddingInput]:
@ -272,7 +272,7 @@ def run_embedding_input_test(
num_logprobs: int,
mm_limit: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
):
"""Inference result should be the same between
original image/video input and image/video embeddings input.

View File

@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Optional
import pytest
@ -92,7 +91,7 @@ def run_test(
model: str,
*,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
) -> None:
prompt_list = PROMPTS * 10
expected_list = EXPECTED[model] * 10

View File

@ -2,9 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Helpers for building inputs that can be leveraged for different test types."""
from collections.abc import Iterable
from collections.abc import Callable, Iterable
from pathlib import PosixPath
from typing import Callable, Optional, Union
import torch
@ -47,9 +46,9 @@ def replace_test_placeholder(
def get_model_prompts(
base_prompts: Iterable[str],
img_idx_to_prompt: Optional[Callable[[int], str]],
video_idx_to_prompt: Optional[Callable[[int], str]],
audio_idx_to_prompt: Optional[Callable[[int], str]],
img_idx_to_prompt: Callable[[int], str] | None,
video_idx_to_prompt: Callable[[int], str] | None,
audio_idx_to_prompt: Callable[[int], str] | None,
prompt_formatter: Callable[[str], str],
) -> list[str]:
"""Given a model-agnostic base prompt and test configuration for a model(s)
@ -93,7 +92,7 @@ def build_single_image_inputs_from_test_info(
test_info: VLMTestInfo,
image_assets: ImageTestAssets,
size_wrapper: ImageSizeWrapper,
tmp_path: Optional[PosixPath] = None,
tmp_path: PosixPath | None = None,
) -> list[PromptWithMultiModalInput]:
if test_info.prompt_formatter is None:
raise ValueError("Prompt formatter must be set to build single image inputs")
@ -147,7 +146,7 @@ def build_multi_image_inputs_from_test_info(
test_info: VLMTestInfo,
image_assets: ImageTestAssets,
size_wrapper: ImageSizeWrapper,
tmp_path: Optional[PosixPath] = None,
tmp_path: PosixPath | None = None,
) -> list[PromptWithMultiModalInput]:
if test_info.prompt_formatter is None:
raise ValueError("Prompt formatter must be set to build multi image inputs")
@ -266,9 +265,7 @@ def build_video_inputs_from_test_info(
]
def apply_image_size_scaling(
image, size: Union[float, tuple[int, int]], size_type: SizeType
):
def apply_image_size_scaling(image, size: float | tuple[int, int], size_type: SizeType):
"""Applies a size scaler to one image; this can be an image size factor,
which scales the image while maintaining the aspect ratio"""
# Special case for embeddings; if it's a tensor, it's only valid if we

View File

@ -2,7 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Core test implementation to be shared across modalities."""
from typing import Any, Callable, Optional
from collections.abc import Callable
from typing import Any
import torch
from transformers.models.auto.auto_factory import _BaseAutoModelClass
@ -27,21 +28,21 @@ def run_test(
enforce_eager: bool,
max_model_len: int,
max_num_seqs: int,
hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
hf_output_post_proc: Callable[[RunnerOutput, str], Any] | None,
vllm_output_post_proc: Callable[[RunnerOutput, str], Any] | None,
auto_cls: type[_BaseAutoModelClass],
use_tokenizer_eos: bool,
comparator: Callable[..., None],
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
stop_str: Optional[list[str]],
get_stop_token_ids: Callable[[AnyTokenizer], list[int]] | None,
stop_str: list[str] | None,
limit_mm_per_prompt: dict[str, int],
vllm_runner_kwargs: Optional[dict[str, Any]],
hf_model_kwargs: Optional[dict[str, Any]],
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
vllm_runner_kwargs: dict[str, Any] | None,
hf_model_kwargs: dict[str, Any] | None,
patch_hf_runner: Callable[[HfRunner], HfRunner] | None,
runner: RunnerOption = "auto",
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
tensor_parallel_size: int = 1,
vllm_embeddings: Optional[torch.Tensor] = None,
vllm_embeddings: torch.Tensor | None = None,
):
"""Modality agnostic test executor for comparing HF/vLLM outputs."""
# In the case of embeddings, vLLM takes separate input tensors

View File

@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Custom input builders for edge-cases in different models."""
from typing import Callable
from collections.abc import Callable
from vllm.assets.image import ImageAsset
from vllm.multimodal.image import rescale_image_size

View File

@ -7,7 +7,6 @@ typically specific to a small subset of models.
import types
from pathlib import PosixPath
from typing import Optional, Union
import numpy as np
import numpy.typing as npt
@ -58,7 +57,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput, model: str) -> RunnerOutpu
def qwen_vllm_to_hf_output(
vllm_output: RunnerOutput, model: str
) -> tuple[list[int], str, Optional[SampleLogprobs]]:
) -> tuple[list[int], str, SampleLogprobs | None]:
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output
@ -69,7 +68,7 @@ def qwen_vllm_to_hf_output(
def qwen2_vllm_to_hf_output(
vllm_output: RunnerOutput, model: str
) -> tuple[list[int], str, Optional[SampleLogprobs]]:
) -> tuple[list[int], str, SampleLogprobs | None]:
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output
@ -80,7 +79,7 @@ def qwen2_vllm_to_hf_output(
def kimiv_vl_vllm_to_hf_output(
vllm_output: RunnerOutput, model: str
) -> tuple[list[int], str, Optional[SampleLogprobs]]:
) -> tuple[list[int], str, SampleLogprobs | None]:
"""Sanitize vllm output [kimi_vl models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output
@ -99,7 +98,7 @@ def llava_image_vllm_to_hf_output(
def llava_video_vllm_to_hf_output(
vllm_output: RunnerOutput, model: str
) -> tuple[list[int], str, Optional[SampleLogprobs]]:
) -> tuple[list[int], str, SampleLogprobs | None]:
config = AutoConfig.from_pretrained(model)
mm_token_id = config.video_token_index
return _llava_vllm_to_hf_output(vllm_output, model, mm_token_id)
@ -263,7 +262,7 @@ def get_llava_embeddings(image_assets: ImageTestAssets):
####### Prompt path encoders for models that need models on disk
def qwen_prompt_path_encoder(
tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset], ImageTestAssets]
tmp_path: PosixPath, prompt: str, assets: list[ImageAsset] | ImageTestAssets
) -> str:
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
@ -440,7 +439,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
def __call__(self, text: str, images: Image | list[Image], **kwargs):
from vllm.model_executor.models.h2ovl import (
IMG_CONTEXT,
IMG_END,
@ -499,7 +498,7 @@ def skyworkr1v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
def __call__(self, text: str, images: Image | list[Image], **kwargs):
from vllm.model_executor.models.skyworkr1v import (
IMG_CONTEXT,
IMG_END,
@ -560,8 +559,8 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def __call__(
self,
text: str,
images: Union[Image, list[Image]] = None,
videos: Union[npt.NDArray, list[npt.NDArray]] = None,
images: Image | list[Image] = None,
videos: npt.NDArray | list[npt.NDArray] = None,
**kwargs,
):
from vllm.model_executor.models.internvl import (
@ -650,7 +649,7 @@ def _internvl_generate(
self,
pixel_values: torch.FloatTensor,
input_ids: torch.FloatTensor,
attention_mask: Optional[torch.LongTensor] = None,
attention_mask: torch.LongTensor | None = None,
**generate_kwargs,
) -> torch.LongTensor:
"""Generate method for InternVL2 model without fixed use_cache."""

View File

@ -2,10 +2,10 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Types for writing multimodal model tests."""
from collections.abc import Iterable
from collections.abc import Callable, Iterable
from enum import Enum
from pathlib import PosixPath
from typing import Any, Callable, NamedTuple, Optional, Union
from typing import Any, NamedTuple
import torch
from pytest import MarkDecorator
@ -52,16 +52,16 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)]
RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
RunnerOutput = tuple[list[int], str, SampleLogprobs | None]
class PromptWithMultiModalInput(NamedTuple):
"""Holds the multimodal input for a single test case."""
prompts: list[str]
image_data: Optional[PromptImageInput] = None
video_data: Optional[PromptVideoInput] = None
audio_data: Optional[PromptAudioInput] = None
image_data: PromptImageInput | None = None
video_data: PromptVideoInput | None = None
audio_data: PromptAudioInput | None = None
class VLMTestType(Enum):
@ -87,17 +87,17 @@ class ImageSizeWrapper(NamedTuple):
type: SizeType
# A size factor is a wrapper of 0+ floats,
# while a fixed size contains an iterable of integer pairs
data: Union[Iterable[float], Iterable[tuple[int, int]]]
data: Iterable[float] | Iterable[tuple[int, int]]
class VLMTestInfo(NamedTuple):
"""Holds the configuration for 1+ tests for one model architecture."""
models: list[str]
test_type: Union[VLMTestType, Iterable[VLMTestType]]
test_type: VLMTestType | Iterable[VLMTestType]
# Should be None only if this is a CUSTOM_INPUTS test
prompt_formatter: Optional[Callable[[str], str]] = None
prompt_formatter: Callable[[str], str] | None = None
img_idx_to_prompt: Callable[[int], str] = lambda idx: "<image>\n"
video_idx_to_prompt: Callable[[int], str] = lambda idx: "<video>\n"
audio_idx_to_prompt: Callable[[int], str] = lambda idx: "<audio>\n"
@ -111,9 +111,9 @@ class VLMTestInfo(NamedTuple):
# Function for converting ImageAssets to image embeddings;
# We need to define this explicitly for embedding tests
convert_assets_to_embeddings: Optional[
Callable[[ImageTestAssets], list[torch.Tensor]]
] = None
convert_assets_to_embeddings: (
Callable[[ImageTestAssets], list[torch.Tensor]] | None
) = None
# Exposed options for vLLM runner; we change these in a several tests,
# but the defaults are derived from VllmRunner & the engine defaults
@ -123,25 +123,25 @@ class VLMTestInfo(NamedTuple):
max_num_seqs: int = 256
runner: RunnerOption = "auto"
tensor_parallel_size: int = 1
vllm_runner_kwargs: Optional[dict[str, Any]] = None
vllm_runner_kwargs: dict[str, Any] | None = None
# Optional callable which gets a list of token IDs from the model tokenizer
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]] = None
get_stop_token_ids: Callable[[AnyTokenizer], list[int]] | None = None
# Optional list of strings to stop generation, useful when stop tokens are
# not special tokens in the tokenizer
stop_str: Optional[list[str]] = None
stop_str: list[str] | None = None
# Exposed options for HF runner
hf_model_kwargs: Optional[dict[str, Any]] = None
hf_model_kwargs: dict[str, Any] | None = None
# Indicates we should explicitly pass the EOS from the tokenizer
use_tokenizer_eos: bool = False
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]] = None
patch_hf_runner: Callable[[HfRunner], HfRunner] | None = None
# Post processors that if defined, will run oun the outputs of the
# vLLM and HF runner, respectively (useful for sanitization, etc).
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]] = None
hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]] = None
vllm_output_post_proc: Callable[[RunnerOutput, str], Any] | None = None
hf_output_post_proc: Callable[[RunnerOutput, str], Any] | None = None
# Consumes the output of the callables above and checks if they're equal
comparator: Callable[..., None] = check_logprobs_close
@ -152,7 +152,7 @@ class VLMTestInfo(NamedTuple):
max_tokens: int = 128
num_logprobs: int = 5
dtype: str = "auto"
distributed_executor_backend: Optional[str] = None
distributed_executor_backend: str | None = None
# Only expanded in video tests
num_video_frames: int = 16
@ -162,19 +162,19 @@ class VLMTestInfo(NamedTuple):
# once per tests (much like concatenating and wrapping in one parametrize
# call)
image_size_factors: Iterable[Iterable[float]] = IMAGE_SIZE_FACTORS
image_sizes: Optional[Iterable[Iterable[tuple[int, int]]]] = None
image_sizes: Iterable[Iterable[tuple[int, int]]] | None = None
# Hack for updating a prompt to take into a local path; currently only used
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for HF runner
prompt_path_encoder: Optional[
Callable[[PosixPath, str, Union[list[ImageAsset], ImageTestAssets]], str]
] = None # noqa: E501
prompt_path_encoder: (
Callable[[PosixPath, str, list[ImageAsset] | ImageTestAssets], str] | None
) = None # noqa: E501
# Allows configuring a test to run with custom inputs
custom_test_opts: Optional[list[CustomTestOptions]] = None
custom_test_opts: list[CustomTestOptions] | None = None
marks: Optional[list[MarkDecorator]] = None
marks: list[MarkDecorator] | None = None
def get_non_parametrized_runner_kwargs(self):
"""Returns a dictionary of expandable kwargs for items that are used
@ -207,10 +207,10 @@ class ExpandableVLMTestArgs(NamedTuple):
max_tokens: int
num_logprobs: int
dtype: str
distributed_executor_backend: Optional[str]
distributed_executor_backend: str | None
# Sizes are used for everything except for custom input tests
size_wrapper: Optional[ImageSizeWrapper] = None
size_wrapper: ImageSizeWrapper | None = None
# Video only
num_video_frames: Optional[int] = None
num_video_frames: int | None = None
# Custom inputs only
custom_test_opts: Optional[CustomTestOptions] = None
custom_test_opts: CustomTestOptions | None = None

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Callable
from collections.abc import Callable
import pytest
import torch

View File

@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Union
import pytest
from transformers import AutoModel
@ -32,7 +31,7 @@ def vllm_reranker(
def create_image_param(url: str) -> ChatCompletionContentPartImageParam:
return {"type": "image_url", "image_url": {"url": f"{url}"}}
query: Union[list[str], ScoreMultiModalParam]
query: list[str] | ScoreMultiModalParam
if query_type == "text":
query = query_strs
elif query_type == "image":
@ -40,7 +39,7 @@ def vllm_reranker(
content=[create_image_param(url) for url in query_strs]
)
documents: Union[list[str], ScoreMultiModalParam]
documents: list[str] | ScoreMultiModalParam
if doc_type == "text":
documents = document_strs
elif doc_type == "image":

View File

@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from functools import partial
from typing import Optional, Union
import numpy as np
import pytest
@ -247,7 +246,7 @@ MM_DATA_PATCHES = {
def _test_processing_correctness_one(
model_config: ModelConfig,
tokenizer: AnyTokenizer,
prompt: Union[str, list[int]],
prompt: str | list[int],
mm_data: MultiModalDataDict,
baseline_processor: BaseMultiModalProcessor,
cached_processor: BaseMultiModalProcessor,
@ -441,7 +440,7 @@ def _assert_inputs_equal(
a: MultiModalInputs,
b: MultiModalInputs,
*,
ignore_mm_keys: Optional[set[str]] = None,
ignore_mm_keys: set[str] | None = None,
msg: str = "",
):
if ignore_mm_keys is None:

View File

@ -3,7 +3,6 @@
"""Tests for H2OVL's multimodal preprocessing kwargs."""
from collections.abc import Mapping
from typing import Optional
import pytest
from PIL import Image
@ -149,7 +148,7 @@ def test_processor_override(
size_factors: list[int],
min_dynamic_patch: int,
max_dynamic_patch: int,
dynamic_image_size: Optional[bool],
dynamic_image_size: bool | None,
kwargs_on_init: bool,
):
mm_processor_kwargs = {

View File

@ -3,7 +3,6 @@
"""Tests for InternVL's multimodal preprocessing kwargs."""
from collections.abc import Mapping
from typing import Optional
import pytest
from PIL import Image
@ -103,7 +102,7 @@ def test_processor_override(
size_factors: list[int],
min_dynamic_patch: int,
max_dynamic_patch: int,
dynamic_image_size: Optional[bool],
dynamic_image_size: bool | None,
kwargs_on_init: bool,
):
mm_processor_kwargs = {

View File

@ -3,7 +3,6 @@
"""Tests for Nemotron-Nano-VL's multimodal preprocessing kwargs."""
from collections.abc import Mapping
from typing import Optional
import pytest
from PIL import Image
@ -105,7 +104,7 @@ def test_processor_override(
size_factors: list[int],
min_dynamic_patch: int,
max_dynamic_patch: int,
dynamic_image_size: Optional[bool],
dynamic_image_size: bool | None,
kwargs_on_init: bool,
):
mm_processor_kwargs = {

View File

@ -4,7 +4,7 @@ import tempfile
from collections.abc import Iterable
from contextlib import contextmanager
from functools import partial
from typing import Any, Union
from typing import Any, TypeAlias
import numpy as np
import pytest
@ -55,15 +55,15 @@ REPO_ID_TO_SKIP = {
}
ImageInput = list[Image.Image]
VideoInput = Union[
list[Image.Image], list[np.ndarray], list[tuple[np.ndarray, dict[str, Any]]]
]
VideoInput: TypeAlias = (
list[Image.Image] | list[np.ndarray] | list[tuple[np.ndarray, dict[str, Any]]]
)
AudioInput = list[tuple[np.ndarray, int]]
def _resize_data(
_data: Union[Image.Image, np.ndarray], size_factor: float
) -> Union[Image.Image, np.ndarray]:
_data: Image.Image | np.ndarray, size_factor: float
) -> Image.Image | np.ndarray:
assert size_factor <= 1, "Size factor must be less than 1"
# Image input
if isinstance(_data, Image.Image):
@ -88,8 +88,8 @@ def _resize_data(
def resize_mm_data(
data: Union[ImageInput, VideoInput, AudioInput], size_factors: tuple[float, ...]
) -> Union[ImageInput, VideoInput, AudioInput]:
data: ImageInput | VideoInput | AudioInput, size_factors: tuple[float, ...]
) -> ImageInput | VideoInput | AudioInput:
size_factors = size_factors[: len(data)]
if is_list_of(data, (Image.Image, np.ndarray, list)):
return [_resize_data(d, s) for d, s in zip(data, size_factors)]

View File

@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Optional
import pytest
import torch
@ -30,7 +29,7 @@ def run_awq_test(
max_tokens: int,
num_logprobs: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
distributed_executor_backend: str | None = None,
):
images = [asset.pil_image for asset in image_assets]

View File

@ -3,7 +3,7 @@
from collections.abc import Mapping, Set
from dataclasses import dataclass, field
from typing import Any, Literal, Optional
from typing import Any, Literal
import pytest
import torch
@ -21,29 +21,29 @@ class _HfExamplesInfo:
extras: Mapping[str, str] = field(default_factory=dict)
"""Extra models to use for testing this architecture."""
tokenizer: Optional[str] = None
tokenizer: str | None = None
"""Set the tokenizer to load for this architecture."""
tokenizer_mode: TokenizerMode = "auto"
"""Set the tokenizer type for this architecture."""
speculative_model: Optional[str] = None
speculative_model: str | None = None
"""
The default model to use for testing this architecture, which is only used
for speculative decoding.
"""
min_transformers_version: Optional[str] = None
min_transformers_version: str | None = None
"""
The minimum version of HF Transformers that is required to run this model.
"""
max_transformers_version: Optional[str] = None
max_transformers_version: str | None = None
"""
The maximum version of HF Transformers that this model runs on.
"""
transformers_version_reason: Optional[str] = None
transformers_version_reason: str | None = None
"""
The reason for the minimum/maximum version requirement.
"""
@ -82,19 +82,19 @@ class _HfExamplesInfo:
hf_overrides: dict[str, Any] = field(default_factory=dict)
"""The ``hf_overrides`` required to load the model."""
max_model_len: Optional[int] = None
max_model_len: int | None = None
"""
The maximum model length to use for this model. Some models default to a
length that is too large to fit into memory in CI.
"""
revision: Optional[str] = None
revision: str | None = None
"""
The specific revision (commit hash, tag, or branch) to use for the model.
If not specified, the default revision will be used.
"""
max_num_seqs: Optional[int] = None
max_num_seqs: int | None = None
"""Maximum number of sequences to be processed in a single iteration."""
use_original_num_layers: bool = False
@ -109,7 +109,7 @@ class _HfExamplesInfo:
on_fail: Literal["error", "skip", "return"],
check_min_version: bool = True,
check_max_version: bool = True,
) -> Optional[str]:
) -> str | None:
"""
If the installed transformers version does not meet the requirements,
perform the given action.

View File

@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test the functionality of the Transformers backend."""
from typing import Any, Optional, Union
from typing import Any
import pytest
@ -21,12 +21,12 @@ def get_model(arch: str) -> str:
def check_implementation(
runner_ref: type[Union[HfRunner, VllmRunner]],
runner_ref: type[HfRunner | VllmRunner],
runner_test: type[VllmRunner],
example_prompts: list[str],
model: str,
kwargs_ref: Optional[dict[str, Any]] = None,
kwargs_test: Optional[dict[str, Any]] = None,
kwargs_ref: dict[str, Any] | None = None,
kwargs_test: dict[str, Any] | None = None,
**kwargs,
):
if kwargs_ref is None:

View File

@ -4,7 +4,7 @@
import warnings
from collections.abc import Sequence
from dataclasses import dataclass
from typing import Any, Optional, Union
from typing import Any
import torch
import torch.nn.functional as F
@ -57,7 +57,7 @@ def check_outputs_equal(
#
# Assumes prompt logprobs were not requested.
TokensTextLogprobs = tuple[
list[int], str, Optional[Union[list[dict[int, float]], SampleLogprobs]]
list[int], str, list[dict[int, float]] | SampleLogprobs | None
]
# Allow for tokens to be represented as str's rather than IDs;
@ -68,7 +68,7 @@ TokensTextLogprobs = tuple[
#
# Assumes prompt logprobs were not requested.
TextTextLogprobs = tuple[
list[str], str, Optional[Union[list[dict[str, float]], list[dict[str, Logprob]]]]
list[str], str, list[dict[str, float]] | list[dict[str, Logprob]] | None
]
# Representation of generated sequence as a tuple of
@ -81,18 +81,18 @@ TextTextLogprobs = tuple[
TokensTextLogprobsPromptLogprobs = tuple[
list[int],
str,
Optional[Union[list[dict[int, float]], SampleLogprobs]],
Optional[Union[list[Optional[dict[int, float]]], PromptLogprobs]],
list[dict[int, float]] | SampleLogprobs | None,
list[dict[int, float] | None] | PromptLogprobs | None,
]
def check_logprobs_close(
*,
outputs_0_lst: Sequence[
Union[TokensTextLogprobs, TokensTextLogprobsPromptLogprobs, TextTextLogprobs]
TokensTextLogprobs | TokensTextLogprobsPromptLogprobs | TextTextLogprobs
],
outputs_1_lst: Sequence[
Union[TokensTextLogprobs, TokensTextLogprobsPromptLogprobs, TextTextLogprobs]
TokensTextLogprobs | TokensTextLogprobsPromptLogprobs | TextTextLogprobs
],
name_0: str,
name_1: str,
@ -273,9 +273,9 @@ def build_model_context(
model_id: str,
runner: RunnerOption = "auto",
dtype: ModelDType = "auto",
model_config_kwargs: Optional[dict[str, Any]] = None,
mm_processor_kwargs: Optional[dict[str, Any]] = None,
limit_mm_per_prompt: Optional[dict[str, int]] = None,
model_config_kwargs: dict[str, Any] | None = None,
mm_processor_kwargs: dict[str, Any] | None = None,
limit_mm_per_prompt: dict[str, int] | None = None,
mm_processor_cache_gb: int = 0,
):
"""Creates an InputProcessingContext for a given model.
@ -369,18 +369,18 @@ class ModelInfo:
name: str
architecture: str = ""
dtype: str = "auto"
max_model_len: Optional[int] = None
max_model_len: int | None = None
hf_dtype: str = "float32"
hf_overrides: Optional[dict[str, Any]] = None
hf_overrides: dict[str, Any] | None = None
default_pooling_type: str = ""
enable_test: bool = True
@dataclass
class EmbedModelInfo(ModelInfo):
mteb_score: Optional[float] = None
mteb_score: float | None = None
is_matryoshka: bool = False
matryoshka_dimensions: Optional[list[int]] = None
matryoshka_dimensions: list[int] | None = None
@dataclass
@ -395,7 +395,7 @@ class LASTPoolingEmbedModelInfo(EmbedModelInfo):
@dataclass
class RerankModelInfo(ModelInfo):
mteb_score: Optional[float] = None
mteb_score: float | None = None
@dataclass
@ -411,14 +411,14 @@ class LASTPoolingRerankModelInfo(RerankModelInfo):
@dataclass
class GenerateModelInfo(ModelInfo):
hf_dtype: str = "auto"
hf_ppl: Optional[float] = None
hf_ppl: float | None = None
def dummy_hf_overrides(
hf_config: PretrainedConfig,
*,
model_arch: str = "",
exist_overrides: Optional[dict[str, Any]] = None,
exist_overrides: dict[str, Any] | None = None,
use_original_num_layers: bool = False,
) -> PretrainedConfig:
"""
@ -507,8 +507,8 @@ def dummy_hf_overrides(
def check_transformers_version(
model: str,
min_transformers_version: Optional[str] = None,
max_transformers_version: Optional[str] = None,
min_transformers_version: str | None = None,
max_transformers_version: str | None = None,
):
from .registry import _HfExamplesInfo