Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-12 17:51:31 +01:00
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Optional

 import pytest
 import torch
@ -138,7 +137,7 @@ def test_models(
            example_prompts, max_tokens, num_logprobs
        )

-        prompt_embeds: Optional[list[torch.Tensor]] = [] if use_prompt_embeds else None
+        prompt_embeds: list[torch.Tensor] | None = [] if use_prompt_embeds else None

        prompt_token_ids = []
        for prompt in example_prompts:
--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from typing import Callable
+from collections.abc import Callable

 import pytest

--- a/tests/models/language/generation_ppl_test/ppl_utils.py
+++ b/tests/models/language/generation_ppl_test/ppl_utils.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # Adapted from https://huggingface.co/docs/transformers/perplexity
-from typing import Optional, cast
+from typing import cast

 import pytest
 import torch
@ -85,7 +85,7 @@ def wikitext_ppl_test(
        n_tokens = 0
        for output in outputs:
            output = cast(TokensTextLogprobsPromptLogprobs, output)
-            token_datas = cast(list[Optional[dict[int, Logprob]]], output[3])
+            token_datas = cast(list[dict[int, Logprob] | None], output[3])

            assert token_datas[0] is None
            token_log_probs = []
--- a/tests/models/language/pooling/embed_utils.py
+++ b/tests/models/language/pooling/embed_utils.py
@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Sequence
-from typing import Optional

 import pytest

@ -13,7 +12,7 @@ def run_embedding_correctness_test(
    hf_model: "HfRunner",
    inputs: list[str],
    vllm_outputs: Sequence[list[float]],
-    dimensions: Optional[int] = None,
+    dimensions: int | None = None,
 ):
    hf_outputs = hf_model.encode(inputs)
    if dimensions:
--- a/tests/models/language/pooling/test_embedding.py
+++ b/tests/models/language/pooling/test_embedding.py
@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Optional

 import pytest

@ -66,7 +65,7 @@ def test_models(
            pooling_type="MEAN", normalize=False
        )

-    max_model_len: Optional[int] = 512
+    max_model_len: int | None = 512
    if model in [
        "sentence-transformers/all-MiniLM-L12-v2",
        "sentence-transformers/stsb-roberta-base-v2",
--- a/tests/models/language/pooling/test_gritlm.py
+++ b/tests/models/language/pooling/test_gritlm.py
@ -1,7 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from __future__ import annotations
-
 import numpy as np
 import openai
 import pytest
--- a/tests/models/language/pooling_mteb_test/mteb_utils.py
+++ b/tests/models/language/pooling_mteb_test/mteb_utils.py
@ -3,7 +3,6 @@

 import tempfile
 from collections.abc import Sequence
-from typing import Optional

 import mteb
 import numpy as np
@ -51,7 +50,7 @@ class VllmMtebEncoder(mteb.Encoder):

    def predict(
        self,
-        sentences: list[tuple[str, str, Optional[str]]],  # query, corpus, prompt
+        sentences: list[tuple[str, str, str | None]],  # query, corpus, prompt
        *args,
        **kwargs,
    ) -> np.ndarray:
@ -100,7 +99,7 @@ class ScoreClientMtebEncoder(mteb.Encoder):

    def predict(
        self,
-        sentences: list[tuple[str, str, Optional[str]]],  # query, corpus, prompt
+        sentences: list[tuple[str, str, str | None]],  # query, corpus, prompt
        *args,
        **kwargs,
    ) -> np.ndarray:
@ -294,7 +293,7 @@ def mteb_test_rerank_models_hf(
        original_predict = hf_model.predict

        def _predict(
-            sentences: list[tuple[str, str, Optional[str]]],  # query, corpus, prompt
+            sentences: list[tuple[str, str, str | None]],  # query, corpus, prompt
            *args,
            **kwargs,
        ):
--- a/tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
+++ b/tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Any, Optional
+from typing import Any

 import numpy as np
 import pytest
@ -111,7 +111,7 @@ class GemmaMtebEncoder(VllmMtebEncoder):

    def predict(
        self,
-        sentences: list[tuple[str, str, Optional[str]]],  # query, corpus, prompt
+        sentences: list[tuple[str, str, str | None]],  # query, corpus, prompt
        *args,
        **kwargs,
    ) -> np.ndarray:
--- a/tests/models/multimodal/generation/test_granite_speech.py
+++ b/tests/models/multimodal/generation/test_granite_speech.py
@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 from collections.abc import Sequence
-from typing import Optional

 import pytest
 from transformers import AutoModelForSpeechSeq2Seq
@ -18,8 +17,8 @@ HF_AUDIO_PROMPT = "<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date:


 def vllm_to_hf_output(
-    vllm_output: tuple[list[int], str, Optional[SampleLogprobs]],
-) -> tuple[list[int], str, Optional[SampleLogprobs]]:
+    vllm_output: tuple[list[int], str, SampleLogprobs | None],
+) -> tuple[list[int], str, SampleLogprobs | None]:
    """Sanitize hf output to be comparable with vllm output."""
    output_ids, output_str, out_logprobs = vllm_output

@ -46,7 +45,7 @@ def run_test(
    max_tokens: int,
    num_logprobs: int,
    tensor_parallel_size: int,
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
 ):
    """Inference result should be the same between hf and vllm.

--- a/tests/models/multimodal/generation/test_phi4_multimodal.py
+++ b/tests/models/multimodal/generation/test_phi4_multimodal.py
@ -3,7 +3,6 @@

 import os
 from collections.abc import Sequence
-from typing import Optional

 import librosa
 import pytest
@ -57,7 +56,7 @@ if current_platform.is_rocm():
 def run_test(
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
-    inputs: Sequence[tuple[list[str], PromptImageInput, Optional[PromptAudioInput]]],
+    inputs: Sequence[tuple[list[str], PromptImageInput, PromptAudioInput | None]],
    model: str,
    *,
    max_model_len: int,
@ -66,7 +65,7 @@ def run_test(
    num_logprobs: int,
    mm_limit: int,
    tensor_parallel_size: int,
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
 ):
    """Inference result should be the same between hf and vllm.

--- a/tests/models/multimodal/generation/test_phi4mm.py
+++ b/tests/models/multimodal/generation/test_phi4mm.py
@ -3,7 +3,6 @@

 import os
 from collections.abc import Sequence
-from typing import Optional

 import librosa
 import pytest
@ -48,7 +47,7 @@ models = [model_path]


 def vllm_to_hf_output(
-    vllm_output: tuple[list[int], str, Optional[SampleLogprobs]], model: str
+    vllm_output: tuple[list[int], str, SampleLogprobs | None], model: str
 ):
    """Sanitize vllm output to be comparable with hf output."""
    _, output_str, out_logprobs = vllm_output
@ -79,7 +78,7 @@ if current_platform.is_rocm():
 def run_test(
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
-    inputs: Sequence[tuple[list[str], PromptImageInput, Optional[PromptAudioInput]]],
+    inputs: Sequence[tuple[list[str], PromptImageInput, PromptAudioInput | None]],
    model: str,
    *,
    max_model_len: int,
@ -88,7 +87,7 @@ def run_test(
    num_logprobs: int,
    mm_limit: int,
    tensor_parallel_size: int,
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
 ):
    """Inference result should be the same between hf and vllm.

--- a/tests/models/multimodal/generation/test_pixtral.py
+++ b/tests/models/multimodal/generation/test_pixtral.py
@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import json
 from dataclasses import asdict
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any

 import pytest
 from mistral_common.multimodal import download_image
@ -117,7 +117,7 @@ FIXTURE_LOGPROBS_CHAT = {
    MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
 }

-OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
+OutputsLogprobs = list[tuple[list[int], str, SampleLogprobs | None]]


 # For the test author to store golden output in JSON
--- a/tests/models/multimodal/generation/test_qwen2_vl.py
+++ b/tests/models/multimodal/generation/test_qwen2_vl.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from typing import Any, Optional, TypedDict, Union
+from typing import Any, TypedDict

 import numpy.typing as npt
 import pytest
@ -83,7 +83,7 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):


 def batch_make_image_embeddings(
-    image_batches: list[Union[Image.Image, list[Image.Image]]],
+    image_batches: list[Image.Image | list[Image.Image]],
    processor,
    llm: VllmRunner,
 ) -> list[Qwen2VLPromptImageEmbeddingInput]:
@ -272,7 +272,7 @@ def run_embedding_input_test(
    num_logprobs: int,
    mm_limit: int,
    tensor_parallel_size: int,
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
 ):
    """Inference result should be the same between
    original image/video input and image/video embeddings input.
--- a/tests/models/multimodal/generation/test_whisper.py
+++ b/tests/models/multimodal/generation/test_whisper.py
@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Optional

 import pytest

@ -92,7 +91,7 @@ def run_test(
    model: str,
    *,
    tensor_parallel_size: int,
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
 ) -> None:
    prompt_list = PROMPTS * 10
    expected_list = EXPECTED[model] * 10
--- a/tests/models/multimodal/generation/vlm_utils/builders.py
+++ b/tests/models/multimodal/generation/vlm_utils/builders.py
@ -2,9 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Helpers for building inputs that can be leveraged for different test types."""

-from collections.abc import Iterable
+from collections.abc import Callable, Iterable
 from pathlib import PosixPath
-from typing import Callable, Optional, Union

 import torch

@ -47,9 +46,9 @@ def replace_test_placeholder(

 def get_model_prompts(
    base_prompts: Iterable[str],
-    img_idx_to_prompt: Optional[Callable[[int], str]],
-    video_idx_to_prompt: Optional[Callable[[int], str]],
-    audio_idx_to_prompt: Optional[Callable[[int], str]],
+    img_idx_to_prompt: Callable[[int], str] | None,
+    video_idx_to_prompt: Callable[[int], str] | None,
+    audio_idx_to_prompt: Callable[[int], str] | None,
    prompt_formatter: Callable[[str], str],
 ) -> list[str]:
    """Given a model-agnostic base prompt and test configuration for a model(s)
@ -93,7 +92,7 @@ def build_single_image_inputs_from_test_info(
    test_info: VLMTestInfo,
    image_assets: ImageTestAssets,
    size_wrapper: ImageSizeWrapper,
-    tmp_path: Optional[PosixPath] = None,
+    tmp_path: PosixPath | None = None,
 ) -> list[PromptWithMultiModalInput]:
    if test_info.prompt_formatter is None:
        raise ValueError("Prompt formatter must be set to build single image inputs")
@ -147,7 +146,7 @@ def build_multi_image_inputs_from_test_info(
    test_info: VLMTestInfo,
    image_assets: ImageTestAssets,
    size_wrapper: ImageSizeWrapper,
-    tmp_path: Optional[PosixPath] = None,
+    tmp_path: PosixPath | None = None,
 ) -> list[PromptWithMultiModalInput]:
    if test_info.prompt_formatter is None:
        raise ValueError("Prompt formatter must be set to build multi image inputs")
@ -266,9 +265,7 @@ def build_video_inputs_from_test_info(
    ]


-def apply_image_size_scaling(
-    image, size: Union[float, tuple[int, int]], size_type: SizeType
-):
+def apply_image_size_scaling(image, size: float | tuple[int, int], size_type: SizeType):
    """Applies a size scaler to one image; this can be an image size factor,
    which scales the image while maintaining the aspect ratio"""
    # Special case for embeddings; if it's a tensor, it's only valid if we
--- a/tests/models/multimodal/generation/vlm_utils/core.py
+++ b/tests/models/multimodal/generation/vlm_utils/core.py
@ -2,7 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Core test implementation to be shared across modalities."""

-from typing import Any, Callable, Optional
+from collections.abc import Callable
+from typing import Any

 import torch
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
@ -27,21 +28,21 @@ def run_test(
    enforce_eager: bool,
    max_model_len: int,
    max_num_seqs: int,
-    hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
-    vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
+    hf_output_post_proc: Callable[[RunnerOutput, str], Any] | None,
+    vllm_output_post_proc: Callable[[RunnerOutput, str], Any] | None,
    auto_cls: type[_BaseAutoModelClass],
    use_tokenizer_eos: bool,
    comparator: Callable[..., None],
-    get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
-    stop_str: Optional[list[str]],
+    get_stop_token_ids: Callable[[AnyTokenizer], list[int]] | None,
+    stop_str: list[str] | None,
    limit_mm_per_prompt: dict[str, int],
-    vllm_runner_kwargs: Optional[dict[str, Any]],
-    hf_model_kwargs: Optional[dict[str, Any]],
-    patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
+    vllm_runner_kwargs: dict[str, Any] | None,
+    hf_model_kwargs: dict[str, Any] | None,
+    patch_hf_runner: Callable[[HfRunner], HfRunner] | None,
    runner: RunnerOption = "auto",
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
    tensor_parallel_size: int = 1,
-    vllm_embeddings: Optional[torch.Tensor] = None,
+    vllm_embeddings: torch.Tensor | None = None,
 ):
    """Modality agnostic test executor for comparing HF/vLLM outputs."""
    # In the case of embeddings, vLLM takes separate input tensors
--- a/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
+++ b/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Custom input builders for edge-cases in different models."""

-from typing import Callable
+from collections.abc import Callable

 from vllm.assets.image import ImageAsset
 from vllm.multimodal.image import rescale_image_size
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@ -7,7 +7,6 @@ typically specific to a small subset of models.

 import types
 from pathlib import PosixPath
-from typing import Optional, Union

 import numpy as np
 import numpy.typing as npt
@ -58,7 +57,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput, model: str) -> RunnerOutpu

 def qwen_vllm_to_hf_output(
    vllm_output: RunnerOutput, model: str
-) -> tuple[list[int], str, Optional[SampleLogprobs]]:
+) -> tuple[list[int], str, SampleLogprobs | None]:
    """Sanitize vllm output [qwen models] to be comparable with hf output."""
    output_ids, output_str, out_logprobs = vllm_output

@ -69,7 +68,7 @@ def qwen_vllm_to_hf_output(

 def qwen2_vllm_to_hf_output(
    vllm_output: RunnerOutput, model: str
-) -> tuple[list[int], str, Optional[SampleLogprobs]]:
+) -> tuple[list[int], str, SampleLogprobs | None]:
    """Sanitize vllm output [qwen2 models] to be comparable with hf output."""
    output_ids, output_str, out_logprobs = vllm_output

@ -80,7 +79,7 @@ def qwen2_vllm_to_hf_output(

 def kimiv_vl_vllm_to_hf_output(
    vllm_output: RunnerOutput, model: str
-) -> tuple[list[int], str, Optional[SampleLogprobs]]:
+) -> tuple[list[int], str, SampleLogprobs | None]:
    """Sanitize vllm output [kimi_vl models] to be comparable with hf output."""
    output_ids, output_str, out_logprobs = vllm_output

@ -99,7 +98,7 @@ def llava_image_vllm_to_hf_output(

 def llava_video_vllm_to_hf_output(
    vllm_output: RunnerOutput, model: str
-) -> tuple[list[int], str, Optional[SampleLogprobs]]:
+) -> tuple[list[int], str, SampleLogprobs | None]:
    config = AutoConfig.from_pretrained(model)
    mm_token_id = config.video_token_index
    return _llava_vllm_to_hf_output(vllm_output, model, mm_token_id)
@ -263,7 +262,7 @@ def get_llava_embeddings(image_assets: ImageTestAssets):

 ####### Prompt path encoders for models that need models on disk
 def qwen_prompt_path_encoder(
-    tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset], ImageTestAssets]
+    tmp_path: PosixPath, prompt: str, assets: list[ImageAsset] | ImageTestAssets
 ) -> str:
    """Given a temporary dir path, export one or more image assets into the
    tempdir & replace its contents with the local path to the string so that
@ -440,7 +439,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
            self.max_num = self.config.max_dynamic_patch
            self.image_size = self.vision_config.image_size

-        def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
+        def __call__(self, text: str, images: Image | list[Image], **kwargs):
            from vllm.model_executor.models.h2ovl import (
                IMG_CONTEXT,
                IMG_END,
@ -499,7 +498,7 @@ def skyworkr1v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
            self.max_num = self.config.max_dynamic_patch
            self.image_size = self.vision_config.image_size

-        def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
+        def __call__(self, text: str, images: Image | list[Image], **kwargs):
            from vllm.model_executor.models.skyworkr1v import (
                IMG_CONTEXT,
                IMG_END,
@ -560,8 +559,8 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
        def __call__(
            self,
            text: str,
-            images: Union[Image, list[Image]] = None,
-            videos: Union[npt.NDArray, list[npt.NDArray]] = None,
+            images: Image | list[Image] = None,
+            videos: npt.NDArray | list[npt.NDArray] = None,
            **kwargs,
        ):
            from vllm.model_executor.models.internvl import (
@ -650,7 +649,7 @@ def _internvl_generate(
    self,
    pixel_values: torch.FloatTensor,
    input_ids: torch.FloatTensor,
-    attention_mask: Optional[torch.LongTensor] = None,
+    attention_mask: torch.LongTensor | None = None,
    **generate_kwargs,
 ) -> torch.LongTensor:
    """Generate method for InternVL2 model without fixed use_cache."""
--- a/tests/models/multimodal/generation/vlm_utils/types.py
+++ b/tests/models/multimodal/generation/vlm_utils/types.py
@ -2,10 +2,10 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Types for writing multimodal model tests."""

-from collections.abc import Iterable
+from collections.abc import Callable, Iterable
 from enum import Enum
 from pathlib import PosixPath
-from typing import Any, Callable, NamedTuple, Optional, Union
+from typing import Any, NamedTuple

 import torch
 from pytest import MarkDecorator
@ -52,16 +52,16 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"

 IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
 EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)]
-RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
+RunnerOutput = tuple[list[int], str, SampleLogprobs | None]


 class PromptWithMultiModalInput(NamedTuple):
    """Holds the multimodal input for a single test case."""

    prompts: list[str]
-    image_data: Optional[PromptImageInput] = None
-    video_data: Optional[PromptVideoInput] = None
-    audio_data: Optional[PromptAudioInput] = None
+    image_data: PromptImageInput | None = None
+    video_data: PromptVideoInput | None = None
+    audio_data: PromptAudioInput | None = None


 class VLMTestType(Enum):
@ -87,17 +87,17 @@ class ImageSizeWrapper(NamedTuple):
    type: SizeType
    # A size factor is a wrapper of 0+ floats,
    # while a fixed size contains an iterable of integer pairs
-    data: Union[Iterable[float], Iterable[tuple[int, int]]]
+    data: Iterable[float] | Iterable[tuple[int, int]]


 class VLMTestInfo(NamedTuple):
    """Holds the configuration for 1+ tests for one model architecture."""

    models: list[str]
-    test_type: Union[VLMTestType, Iterable[VLMTestType]]
+    test_type: VLMTestType | Iterable[VLMTestType]

    # Should be None only if this is a CUSTOM_INPUTS test
-    prompt_formatter: Optional[Callable[[str], str]] = None
+    prompt_formatter: Callable[[str], str] | None = None
    img_idx_to_prompt: Callable[[int], str] = lambda idx: "<image>\n"
    video_idx_to_prompt: Callable[[int], str] = lambda idx: "<video>\n"
    audio_idx_to_prompt: Callable[[int], str] = lambda idx: "<audio>\n"
@ -111,9 +111,9 @@ class VLMTestInfo(NamedTuple):

    # Function for converting ImageAssets to image embeddings;
    # We need to define this explicitly for embedding tests
-    convert_assets_to_embeddings: Optional[
-        Callable[[ImageTestAssets], list[torch.Tensor]]
-    ] = None
+    convert_assets_to_embeddings: (
+        Callable[[ImageTestAssets], list[torch.Tensor]] | None
+    ) = None

    # Exposed options for vLLM runner; we change these in a several tests,
    # but the defaults are derived from VllmRunner & the engine defaults
@ -123,25 +123,25 @@ class VLMTestInfo(NamedTuple):
    max_num_seqs: int = 256
    runner: RunnerOption = "auto"
    tensor_parallel_size: int = 1
-    vllm_runner_kwargs: Optional[dict[str, Any]] = None
+    vllm_runner_kwargs: dict[str, Any] | None = None

    # Optional callable which gets a list of token IDs from the model tokenizer
-    get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]] = None
+    get_stop_token_ids: Callable[[AnyTokenizer], list[int]] | None = None
    # Optional list of strings to stop generation, useful when stop tokens are
    # not special tokens in the tokenizer
-    stop_str: Optional[list[str]] = None
+    stop_str: list[str] | None = None

    # Exposed options for HF runner
-    hf_model_kwargs: Optional[dict[str, Any]] = None
+    hf_model_kwargs: dict[str, Any] | None = None
    # Indicates we should explicitly pass the EOS from the tokenizer
    use_tokenizer_eos: bool = False
    auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
-    patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]] = None
+    patch_hf_runner: Callable[[HfRunner], HfRunner] | None = None

    # Post processors that if defined, will run oun the outputs of the
    # vLLM and HF runner, respectively (useful for sanitization, etc).
-    vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]] = None
-    hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]] = None
+    vllm_output_post_proc: Callable[[RunnerOutput, str], Any] | None = None
+    hf_output_post_proc: Callable[[RunnerOutput, str], Any] | None = None

    # Consumes the output of the callables above and checks if they're equal
    comparator: Callable[..., None] = check_logprobs_close
@ -152,7 +152,7 @@ class VLMTestInfo(NamedTuple):
    max_tokens: int = 128
    num_logprobs: int = 5
    dtype: str = "auto"
-    distributed_executor_backend: Optional[str] = None
+    distributed_executor_backend: str | None = None
    # Only expanded in video tests
    num_video_frames: int = 16

@ -162,19 +162,19 @@ class VLMTestInfo(NamedTuple):
    # once per tests (much like concatenating and wrapping in one parametrize
    # call)
    image_size_factors: Iterable[Iterable[float]] = IMAGE_SIZE_FACTORS
-    image_sizes: Optional[Iterable[Iterable[tuple[int, int]]]] = None
+    image_sizes: Iterable[Iterable[tuple[int, int]]] | None = None

    # Hack for updating a prompt to take into a local path; currently only used
    # for Qwen-VL, which requires encoding the image path / url into the prompt
    # for HF runner
-    prompt_path_encoder: Optional[
-        Callable[[PosixPath, str, Union[list[ImageAsset], ImageTestAssets]], str]
-    ] = None  # noqa: E501
+    prompt_path_encoder: (
+        Callable[[PosixPath, str, list[ImageAsset] | ImageTestAssets], str] | None
+    ) = None  # noqa: E501

    # Allows configuring a test to run with custom inputs
-    custom_test_opts: Optional[list[CustomTestOptions]] = None
+    custom_test_opts: list[CustomTestOptions] | None = None

-    marks: Optional[list[MarkDecorator]] = None
+    marks: list[MarkDecorator] | None = None

    def get_non_parametrized_runner_kwargs(self):
        """Returns a dictionary of expandable kwargs for items that are used
@ -207,10 +207,10 @@ class ExpandableVLMTestArgs(NamedTuple):
    max_tokens: int
    num_logprobs: int
    dtype: str
-    distributed_executor_backend: Optional[str]
+    distributed_executor_backend: str | None
    # Sizes are used for everything except for custom input tests
-    size_wrapper: Optional[ImageSizeWrapper] = None
+    size_wrapper: ImageSizeWrapper | None = None
    # Video only
-    num_video_frames: Optional[int] = None
+    num_video_frames: int | None = None
    # Custom inputs only
-    custom_test_opts: Optional[CustomTestOptions] = None
+    custom_test_opts: CustomTestOptions | None = None
--- a/tests/models/multimodal/pooling/test_dse_qwen2_vl.py
+++ b/tests/models/multimodal/pooling/test_dse_qwen2_vl.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from typing import Callable
+from collections.abc import Callable

 import pytest
 import torch
--- a/tests/models/multimodal/pooling/test_jinavl_reranker.py
+++ b/tests/models/multimodal/pooling/test_jinavl_reranker.py
@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Union

 import pytest
 from transformers import AutoModel
@ -32,7 +31,7 @@ def vllm_reranker(
    def create_image_param(url: str) -> ChatCompletionContentPartImageParam:
        return {"type": "image_url", "image_url": {"url": f"{url}"}}

-    query: Union[list[str], ScoreMultiModalParam]
+    query: list[str] | ScoreMultiModalParam
    if query_type == "text":
        query = query_strs
    elif query_type == "image":
@ -40,7 +39,7 @@ def vllm_reranker(
            content=[create_image_param(url) for url in query_strs]
        )

-    documents: Union[list[str], ScoreMultiModalParam]
+    documents: list[str] | ScoreMultiModalParam
    if doc_type == "text":
        documents = document_strs
    elif doc_type == "image":
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 from functools import partial
-from typing import Optional, Union

 import numpy as np
 import pytest
@ -247,7 +246,7 @@ MM_DATA_PATCHES = {
 def _test_processing_correctness_one(
    model_config: ModelConfig,
    tokenizer: AnyTokenizer,
-    prompt: Union[str, list[int]],
+    prompt: str | list[int],
    mm_data: MultiModalDataDict,
    baseline_processor: BaseMultiModalProcessor,
    cached_processor: BaseMultiModalProcessor,
@ -441,7 +440,7 @@ def _assert_inputs_equal(
    a: MultiModalInputs,
    b: MultiModalInputs,
    *,
-    ignore_mm_keys: Optional[set[str]] = None,
+    ignore_mm_keys: set[str] | None = None,
    msg: str = "",
 ):
    if ignore_mm_keys is None:
--- a/tests/models/multimodal/processing/test_h2ovl.py
+++ b/tests/models/multimodal/processing/test_h2ovl.py
@ -3,7 +3,6 @@
 """Tests for H2OVL's multimodal preprocessing kwargs."""

 from collections.abc import Mapping
-from typing import Optional

 import pytest
 from PIL import Image
@ -149,7 +148,7 @@ def test_processor_override(
    size_factors: list[int],
    min_dynamic_patch: int,
    max_dynamic_patch: int,
-    dynamic_image_size: Optional[bool],
+    dynamic_image_size: bool | None,
    kwargs_on_init: bool,
 ):
    mm_processor_kwargs = {
--- a/tests/models/multimodal/processing/test_internvl.py
+++ b/tests/models/multimodal/processing/test_internvl.py
@ -3,7 +3,6 @@
 """Tests for InternVL's multimodal preprocessing kwargs."""

 from collections.abc import Mapping
-from typing import Optional

 import pytest
 from PIL import Image
@ -103,7 +102,7 @@ def test_processor_override(
    size_factors: list[int],
    min_dynamic_patch: int,
    max_dynamic_patch: int,
-    dynamic_image_size: Optional[bool],
+    dynamic_image_size: bool | None,
    kwargs_on_init: bool,
 ):
    mm_processor_kwargs = {
--- a/tests/models/multimodal/processing/test_nemotron_vl.py
+++ b/tests/models/multimodal/processing/test_nemotron_vl.py
@ -3,7 +3,6 @@
 """Tests for Nemotron-Nano-VL's multimodal preprocessing kwargs."""

 from collections.abc import Mapping
-from typing import Optional

 import pytest
 from PIL import Image
@ -105,7 +104,7 @@ def test_processor_override(
    size_factors: list[int],
    min_dynamic_patch: int,
    max_dynamic_patch: int,
-    dynamic_image_size: Optional[bool],
+    dynamic_image_size: bool | None,
    kwargs_on_init: bool,
 ):
    mm_processor_kwargs = {
--- a/tests/models/multimodal/processing/test_tensor_schema.py
+++ b/tests/models/multimodal/processing/test_tensor_schema.py
@ -4,7 +4,7 @@ import tempfile
 from collections.abc import Iterable
 from contextlib import contextmanager
 from functools import partial
-from typing import Any, Union
+from typing import Any, TypeAlias

 import numpy as np
 import pytest
@ -55,15 +55,15 @@ REPO_ID_TO_SKIP = {
 }

 ImageInput = list[Image.Image]
-VideoInput = Union[
-    list[Image.Image], list[np.ndarray], list[tuple[np.ndarray, dict[str, Any]]]
-]
+VideoInput: TypeAlias = (
+    list[Image.Image] | list[np.ndarray] | list[tuple[np.ndarray, dict[str, Any]]]
+)
 AudioInput = list[tuple[np.ndarray, int]]


 def _resize_data(
-    _data: Union[Image.Image, np.ndarray], size_factor: float
-) -> Union[Image.Image, np.ndarray]:
+    _data: Image.Image | np.ndarray, size_factor: float
+) -> Image.Image | np.ndarray:
    assert size_factor <= 1, "Size factor must be less than 1"
    # Image input
    if isinstance(_data, Image.Image):
@ -88,8 +88,8 @@ def _resize_data(


 def resize_mm_data(
-    data: Union[ImageInput, VideoInput, AudioInput], size_factors: tuple[float, ...]
-) -> Union[ImageInput, VideoInput, AudioInput]:
+    data: ImageInput | VideoInput | AudioInput, size_factors: tuple[float, ...]
+) -> ImageInput | VideoInput | AudioInput:
    size_factors = size_factors[: len(data)]
    if is_list_of(data, (Image.Image, np.ndarray, list)):
        return [_resize_data(d, s) for d, s in zip(data, size_factors)]
--- a/tests/models/quantization/test_awq.py
+++ b/tests/models/quantization/test_awq.py
@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from typing import Optional

 import pytest
 import torch
@ -30,7 +29,7 @@ def run_awq_test(
    max_tokens: int,
    num_logprobs: int,
    tensor_parallel_size: int,
-    distributed_executor_backend: Optional[str] = None,
+    distributed_executor_backend: str | None = None,
 ):
    images = [asset.pil_image for asset in image_assets]

--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@ -3,7 +3,7 @@

 from collections.abc import Mapping, Set
 from dataclasses import dataclass, field
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import pytest
 import torch
@ -21,29 +21,29 @@ class _HfExamplesInfo:
    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

-    tokenizer: Optional[str] = None
+    tokenizer: str | None = None
    """Set the tokenizer to load for this architecture."""

    tokenizer_mode: TokenizerMode = "auto"
    """Set the tokenizer type for this architecture."""

-    speculative_model: Optional[str] = None
+    speculative_model: str | None = None
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

-    min_transformers_version: Optional[str] = None
+    min_transformers_version: str | None = None
    """
    The minimum version of HF Transformers that is required to run this model.
    """

-    max_transformers_version: Optional[str] = None
+    max_transformers_version: str | None = None
    """
    The maximum version of HF Transformers that this model runs on.
    """

-    transformers_version_reason: Optional[str] = None
+    transformers_version_reason: str | None = None
    """
    The reason for the minimum/maximum version requirement.
    """
@ -82,19 +82,19 @@ class _HfExamplesInfo:
    hf_overrides: dict[str, Any] = field(default_factory=dict)
    """The ``hf_overrides`` required to load the model."""

-    max_model_len: Optional[int] = None
+    max_model_len: int | None = None
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

-    revision: Optional[str] = None
+    revision: str | None = None
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

-    max_num_seqs: Optional[int] = None
+    max_num_seqs: int | None = None
    """Maximum number of sequences to be processed in a single iteration."""

    use_original_num_layers: bool = False
@ -109,7 +109,7 @@ class _HfExamplesInfo:
        on_fail: Literal["error", "skip", "return"],
        check_min_version: bool = True,
        check_max_version: bool = True,
-    ) -> Optional[str]:
+    ) -> str | None:
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Test the functionality of the Transformers backend."""

-from typing import Any, Optional, Union
+from typing import Any

 import pytest

@ -21,12 +21,12 @@ def get_model(arch: str) -> str:


 def check_implementation(
-    runner_ref: type[Union[HfRunner, VllmRunner]],
+    runner_ref: type[HfRunner | VllmRunner],
    runner_test: type[VllmRunner],
    example_prompts: list[str],
    model: str,
-    kwargs_ref: Optional[dict[str, Any]] = None,
-    kwargs_test: Optional[dict[str, Any]] = None,
+    kwargs_ref: dict[str, Any] | None = None,
+    kwargs_test: dict[str, Any] | None = None,
    **kwargs,
 ):
    if kwargs_ref is None:
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@ -4,7 +4,7 @@
 import warnings
 from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import Any, Optional, Union
+from typing import Any

 import torch
 import torch.nn.functional as F
@ -57,7 +57,7 @@ def check_outputs_equal(
 #
 # Assumes prompt logprobs were not requested.
 TokensTextLogprobs = tuple[
-    list[int], str, Optional[Union[list[dict[int, float]], SampleLogprobs]]
+    list[int], str, list[dict[int, float]] | SampleLogprobs | None
 ]

 # Allow for tokens to be represented as str's rather than IDs;
@ -68,7 +68,7 @@ TokensTextLogprobs = tuple[
 #
 # Assumes prompt logprobs were not requested.
 TextTextLogprobs = tuple[
-    list[str], str, Optional[Union[list[dict[str, float]], list[dict[str, Logprob]]]]
+    list[str], str, list[dict[str, float]] | list[dict[str, Logprob]] | None
 ]

 # Representation of generated sequence as a tuple of
@ -81,18 +81,18 @@ TextTextLogprobs = tuple[
 TokensTextLogprobsPromptLogprobs = tuple[
    list[int],
    str,
-    Optional[Union[list[dict[int, float]], SampleLogprobs]],
-    Optional[Union[list[Optional[dict[int, float]]], PromptLogprobs]],
+    list[dict[int, float]] | SampleLogprobs | None,
+    list[dict[int, float] | None] | PromptLogprobs | None,
 ]


 def check_logprobs_close(
    *,
    outputs_0_lst: Sequence[
-        Union[TokensTextLogprobs, TokensTextLogprobsPromptLogprobs, TextTextLogprobs]
+        TokensTextLogprobs | TokensTextLogprobsPromptLogprobs | TextTextLogprobs
    ],
    outputs_1_lst: Sequence[
-        Union[TokensTextLogprobs, TokensTextLogprobsPromptLogprobs, TextTextLogprobs]
+        TokensTextLogprobs | TokensTextLogprobsPromptLogprobs | TextTextLogprobs
    ],
    name_0: str,
    name_1: str,
@ -273,9 +273,9 @@ def build_model_context(
    model_id: str,
    runner: RunnerOption = "auto",
    dtype: ModelDType = "auto",
-    model_config_kwargs: Optional[dict[str, Any]] = None,
-    mm_processor_kwargs: Optional[dict[str, Any]] = None,
-    limit_mm_per_prompt: Optional[dict[str, int]] = None,
+    model_config_kwargs: dict[str, Any] | None = None,
+    mm_processor_kwargs: dict[str, Any] | None = None,
+    limit_mm_per_prompt: dict[str, int] | None = None,
    mm_processor_cache_gb: int = 0,
 ):
    """Creates an InputProcessingContext for a given model.
@ -369,18 +369,18 @@ class ModelInfo:
    name: str
    architecture: str = ""
    dtype: str = "auto"
-    max_model_len: Optional[int] = None
+    max_model_len: int | None = None
    hf_dtype: str = "float32"
-    hf_overrides: Optional[dict[str, Any]] = None
+    hf_overrides: dict[str, Any] | None = None
    default_pooling_type: str = ""
    enable_test: bool = True


@dataclass
 class EmbedModelInfo(ModelInfo):
-    mteb_score: Optional[float] = None
+    mteb_score: float | None = None
    is_matryoshka: bool = False
-    matryoshka_dimensions: Optional[list[int]] = None
+    matryoshka_dimensions: list[int] | None = None


@dataclass
@ -395,7 +395,7 @@ class LASTPoolingEmbedModelInfo(EmbedModelInfo):

@dataclass
 class RerankModelInfo(ModelInfo):
-    mteb_score: Optional[float] = None
+    mteb_score: float | None = None


@dataclass
@ -411,14 +411,14 @@ class LASTPoolingRerankModelInfo(RerankModelInfo):
@dataclass
 class GenerateModelInfo(ModelInfo):
    hf_dtype: str = "auto"
-    hf_ppl: Optional[float] = None
+    hf_ppl: float | None = None


 def dummy_hf_overrides(
    hf_config: PretrainedConfig,
    *,
    model_arch: str = "",
-    exist_overrides: Optional[dict[str, Any]] = None,
+    exist_overrides: dict[str, Any] | None = None,
    use_original_num_layers: bool = False,
 ) -> PretrainedConfig:
    """
@ -507,8 +507,8 @@ def dummy_hf_overrides(

 def check_transformers_version(
    model: str,
-    min_transformers_version: Optional[str] = None,
-    max_transformers_version: Optional[str] = None,
+    min_transformers_version: str | None = None,
+    max_transformers_version: str | None = None,
 ):
    from .registry import _HfExamplesInfo