Update deprecated Python 3.8 typing (#13971)

2025-03-03 01:34:51 +00:00
parent bf33700ecd
commit cf069aa8aa
300 changed files with 2294 additions and 2347 deletions
--- a/tests/models/decoder_only/audio_language/test_ultravox.py
+++ b/tests/models/decoder_only/audio_language/test_ultravox.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Optional, Tuple, Type
+from typing import Optional

 import numpy as np
 import pytest
@ -17,7 +17,7 @@ from ...utils import check_logprobs_close

 MODEL_NAME = "fixie-ai/ultravox-v0_4"

-AudioTuple = Tuple[np.ndarray, int]
+AudioTuple = tuple[np.ndarray, int]

 VLLM_PLACEHOLDER = "<|audio|>"
 HF_PLACEHOLDER = "<|audio|>"
@ -78,7 +78,7 @@ def _get_prompt(audio_count, question, placeholder):
                                         add_generation_prompt=True)


-def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
+def vllm_to_hf_output(vllm_output: tuple[list[int], str,
                                         Optional[SampleLogprobs]],
                      model: str):
    """Sanitize vllm output to be comparable with hf output."""
@ -96,9 +96,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,


 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    prompts_and_audios: List[Tuple[str, str, AudioTuple]],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    prompts_and_audios: list[tuple[str, str, AudioTuple]],
    model: str,
    *,
    dtype: str,
@ -158,8 +158,8 @@ def run_test(


 def run_multi_audio_test(
-    vllm_runner: Type[VllmRunner],
-    prompts_and_audios: List[Tuple[str, List[AudioTuple]]],
+    vllm_runner: type[VllmRunner],
+    prompts_and_audios: list[tuple[str, list[AudioTuple]]],
    model: str,
    *,
    dtype: str,
--- a/tests/models/decoder_only/language/test_gguf.py
+++ b/tests/models/decoder_only/language/test_gguf.py
@ -5,7 +5,7 @@ Note: To pass the test, quantization higher than Q4 should be used
 """

 import os
-from typing import List, NamedTuple, Type
+from typing import NamedTuple

 import pytest
 from huggingface_hub import hf_hub_download
@ -90,8 +90,8 @@ MODELS = [
@pytest.mark.parametrize("tp_size", [1, 2])
 def test_models(
    num_gpus_available: int,
-    vllm_runner: Type[VllmRunner],
-    example_prompts: List[str],
+    vllm_runner: type[VllmRunner],
+    example_prompts: list[str],
    model: GGUFTestConfig,
    dtype: str,
    max_tokens: int,
--- a/tests/models/decoder_only/language/test_modelopt.py
+++ b/tests/models/decoder_only/language/test_modelopt.py
@ -5,7 +5,6 @@
 Note: these tests will only pass on H100
 """
 import os
-from typing import List

 import pytest
 from transformers import AutoTokenizer
@ -65,7 +64,7 @@ def test_models(example_prompts, model_name) -> None:
        for prompt in example_prompts
    ]
    params = SamplingParams(max_tokens=20, temperature=0)
-    generations: List[str] = []
+    generations: list[str] = []
    # Note: these need to be run 1 at a time due to numerical precision,
    # since the expected strs were generated this way.
    for prompt in formatted_prompts:
--- a/tests/models/decoder_only/vision_language/test_awq.py
+++ b/tests/models/decoder_only/vision_language/test_awq.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Optional, Type
+from typing import Optional

 import pytest
 import torch
@ -19,12 +19,12 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({


 def run_awq_test(
-    vllm_runner: Type[VllmRunner],
+    vllm_runner: type[VllmRunner],
    image_assets: _ImageAssets,
    source_model: str,
    quant_model: str,
    *,
-    size_factors: List[float],
+    size_factors: list[float],
    dtype: str,
    max_tokens: int,
    num_logprobs: int,
--- a/tests/models/decoder_only/vision_language/test_models.py
+++ b/tests/models/decoder_only/vision_language/test_models.py
@ -6,7 +6,6 @@ import math
 import os
 from collections import defaultdict
 from pathlib import PosixPath
-from typing import Type

 import pytest
 from packaging.version import Version
@ -562,8 +561,8 @@ VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
    ))
 def test_single_image_models(tmp_path: PosixPath, model_type: str,
                             test_case: ExpandableVLMTestArgs,
-                             hf_runner: Type[HfRunner],
-                             vllm_runner: Type[VllmRunner],
+                             hf_runner: type[HfRunner],
+                             vllm_runner: type[VllmRunner],
                             image_assets: _ImageAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_single_image_test(
@ -585,8 +584,8 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
    ))
 def test_multi_image_models(tmp_path: PosixPath, model_type: str,
                            test_case: ExpandableVLMTestArgs,
-                            hf_runner: Type[HfRunner],
-                            vllm_runner: Type[VllmRunner],
+                            hf_runner: type[HfRunner],
+                            vllm_runner: type[VllmRunner],
                            image_assets: _ImageAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_multi_image_test(
@ -608,8 +607,8 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
    ))
 def test_image_embedding_models(model_type: str,
                                test_case: ExpandableVLMTestArgs,
-                                hf_runner: Type[HfRunner],
-                                vllm_runner: Type[VllmRunner],
+                                hf_runner: type[HfRunner],
+                                vllm_runner: type[VllmRunner],
                                image_assets: _ImageAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_embedding_test(
@ -629,7 +628,7 @@ def test_image_embedding_models(model_type: str,
        fork_new_process_for_each_test=False,
    ))
 def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
-                      hf_runner: Type[HfRunner], vllm_runner: Type[VllmRunner],
+                      hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
                      video_assets: _VideoAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_video_test(
@ -651,8 +650,8 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
 def test_custom_inputs_models(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
 ):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_custom_inputs_test(
@ -674,8 +673,8 @@ def test_custom_inputs_models(
@fork_new_process_for_each_test
 def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
                                   test_case: ExpandableVLMTestArgs,
-                                   hf_runner: Type[HfRunner],
-                                   vllm_runner: Type[VllmRunner],
+                                   hf_runner: type[HfRunner],
+                                   vllm_runner: type[VllmRunner],
                                   image_assets: _ImageAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_single_image_test(
@ -698,8 +697,8 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
@fork_new_process_for_each_test
 def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
                                  test_case: ExpandableVLMTestArgs,
-                                  hf_runner: Type[HfRunner],
-                                  vllm_runner: Type[VllmRunner],
+                                  hf_runner: type[HfRunner],
+                                  vllm_runner: type[VllmRunner],
                                  image_assets: _ImageAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_multi_image_test(
@ -722,8 +721,8 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
@fork_new_process_for_each_test
 def test_image_embedding_models_heavy(model_type: str,
                                      test_case: ExpandableVLMTestArgs,
-                                      hf_runner: Type[HfRunner],
-                                      vllm_runner: Type[VllmRunner],
+                                      hf_runner: type[HfRunner],
+                                      vllm_runner: type[VllmRunner],
                                      image_assets: _ImageAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_embedding_test(
@ -743,8 +742,8 @@ def test_image_embedding_models_heavy(model_type: str,
        fork_new_process_for_each_test=True,
    ))
 def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
-                            hf_runner: Type[HfRunner],
-                            vllm_runner: Type[VllmRunner],
+                            hf_runner: type[HfRunner],
+                            vllm_runner: type[VllmRunner],
                            video_assets: _VideoAssets):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_video_test(
@ -767,8 +766,8 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
 def test_custom_inputs_models_heavy(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
 ):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_custom_inputs_test(
--- a/tests/models/decoder_only/vision_language/test_phi3v.py
+++ b/tests/models/decoder_only/vision_language/test_phi3v.py
@ -2,7 +2,7 @@

 import os
 import re
-from typing import List, Optional, Tuple, Type
+from typing import Optional

 import pytest
 from transformers import AutoTokenizer
@ -25,7 +25,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these
 models = ["microsoft/Phi-3.5-vision-instruct"]


-def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
+def vllm_to_hf_output(vllm_output: tuple[list[int], str,
                                         Optional[SampleLogprobs]],
                      model: str):
    """Sanitize vllm output to be comparable with hf output."""
@ -55,9 +55,9 @@ if current_platform.is_rocm():


 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    inputs: List[Tuple[List[str], PromptImageInput]],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    inputs: list[tuple[list[str], PromptImageInput]],
    model: str,
    *,
    dtype: str,
--- a/tests/models/decoder_only/vision_language/test_pixtral.py
+++ b/tests/models/decoder_only/vision_language/test_pixtral.py
@ -6,7 +6,7 @@ Run `pytest tests/models/test_mistral.py`.
 import json
 import uuid
 from dataclasses import asdict
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Optional

 import pytest
 from mistral_common.multimodal import download_image
@ -38,7 +38,7 @@ IMG_URLS = [
 PROMPT = "Describe each image in one short sentence."


-def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
+def _create_msg_format(urls: list[str]) -> list[dict[str, Any]]:
    return [{
        "role":
        "user",
@ -54,7 +54,7 @@ def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
    }]


-def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
+def _create_msg_format_hf(urls: list[str]) -> list[dict[str, Any]]:
    return [{
        "role":
        "user",
@ -68,7 +68,7 @@ def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
    }]


-def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
+def _create_engine_inputs(urls: list[str]) -> TokensPrompt:
    msg = _create_msg_format(urls)

    tokenizer = MistralTokenizer.from_model("pixtral")
@ -89,7 +89,7 @@ def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
    return engine_inputs


-def _create_engine_inputs_hf(urls: List[str]) -> TextPrompt:
+def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
    msg = _create_msg_format_hf(urls)

    tokenizer = AutoProcessor.from_pretrained("mistral-community/pixtral-12b")
@ -128,7 +128,7 @@ assert FIXTURES_PATH.exists()
 FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
 FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"

-OutputsLogprobs = List[Tuple[List[int], str, Optional[SampleLogprobs]]]
+OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]


 # For the test author to store golden output in JSON
--- a/tests/models/decoder_only/vision_language/test_qwen2_vl.py
+++ b/tests/models/decoder_only/vision_language/test_qwen2_vl.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0

-from typing import Any, List, Optional, Tuple, Type, TypedDict, Union
+from typing import Any, Optional, TypedDict, Union

 import numpy.typing as npt
 import pytest
@ -69,21 +69,21 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):


 def batch_make_image_embeddings(
-        image_batches: List[Union[Image.Image, List[Image.Image]]], processor,
-        llm: VllmRunner) -> List[Qwen2VLPromptImageEmbeddingInput]:
+        image_batches: list[Union[Image.Image, list[Image.Image]]], processor,
+        llm: VllmRunner) -> list[Qwen2VLPromptImageEmbeddingInput]:
    """batched image embeddings for Qwen2-VL

    This will infer all images' embeddings in a single batch, 
      and split the result according to input batches.

    image_batches:
-      - Single-image batches: `List[Image.Image]`
-      - Multiple-image batches: `List[List[Image.Image]]]`
+      - Single-image batches: `list[Image.Image]`
+      - Multiple-image batches: `list[list[Image.Image]]]`
    
-    returns: `List[Qwen2VLPromptImageEmbeddingInput]`
+    returns: `list[Qwen2VLPromptImageEmbeddingInput]`
    """

-    image_batches_: List[Any] = image_batches[:]
+    image_batches_: list[Any] = image_batches[:]

    # convert single-image batches to multiple-image batches
    for idx in range(len(image_batches_)):
@ -93,7 +93,7 @@ def batch_make_image_embeddings(
        assert isinstance(image_batches_[idx], list)

    # append all images into a list (as a batch)
-    images: List[Image.Image] = []
+    images: list[Image.Image] = []
    for image_batch in image_batches_:
        images += image_batch

@ -121,7 +121,7 @@ def batch_make_image_embeddings(
    image_embeds = torch.concat(llm.apply_model(get_image_embeds))

    # split into original batches
-    result: List[Qwen2VLPromptImageEmbeddingInput] = []
+    result: list[Qwen2VLPromptImageEmbeddingInput] = []
    image_counter = 0
    embed_counter = 0
    for image_batch in image_batches_:
@ -153,7 +153,7 @@ def batch_make_image_embeddings(

 def batch_make_video_embeddings(
        video_batches: PromptVideoInput, processor,
-        llm: VllmRunner) -> List[Qwen2VLPromptVideoEmbeddingInput]:
+        llm: VllmRunner) -> list[Qwen2VLPromptVideoEmbeddingInput]:
    """batched video embeddings for Qwen2-VL

    A NDArray represents a single video's all frames.
@ -162,21 +162,21 @@ def batch_make_video_embeddings(
      and split the result according to input batches.

    video_batches:
-      - Single-video batches: `List[NDArray]`
-      - Multiple-video batches: `List[List[NDArray]]`
+      - Single-video batches: `list[NDArray]`
+      - Multiple-video batches: `list[list[NDArray]]`
    """

-    video_batches_: List[Any] = video_batches[:]
+    video_batches_: list[Any] = video_batches[:]

    for idx in range(len(video_batches_)):
        if not isinstance(video_batches_[idx], list):
-            single_video_batch: List[npt.NDArray] = [video_batches_[idx]]
+            single_video_batch: list[npt.NDArray] = [video_batches_[idx]]
            video_batches_[idx] = single_video_batch

        assert isinstance(video_batches_[idx], list)

    # append all videos into a list (as a batch)
-    videos: List[npt.NDArray] = []
+    videos: list[npt.NDArray] = []
    for video_batch in video_batches_:
        videos += video_batch

@ -204,7 +204,7 @@ def batch_make_video_embeddings(
    video_embeds = torch.concat(llm.apply_model(get_image_embeds))

    # split into original batches
-    result: List[Qwen2VLPromptVideoEmbeddingInput] = []
+    result: list[Qwen2VLPromptVideoEmbeddingInput] = []
    video_counter = 0
    embed_counter = 0
    for video_batch in video_batches_:
@ -235,8 +235,8 @@ def batch_make_video_embeddings(


 def run_embedding_input_test(
-    vllm_runner: Type[VllmRunner],
-    inputs: List[Tuple[List[str], PromptImageInput, PromptVideoInput]],
+    vllm_runner: type[VllmRunner],
+    inputs: list[tuple[list[str], PromptImageInput, PromptVideoInput]],
    model: str,
    *,
    dtype: str,
@ -323,8 +323,8 @@ def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model,
                                         num_logprobs: int) -> None:
    images = [asset.pil_image for asset in image_assets]

-    inputs_per_case: List[Tuple[
-        List[str], PromptImageInput, PromptVideoInput]] = [(
+    inputs_per_case: list[tuple[
+        list[str], PromptImageInput, PromptVideoInput]] = [(
            [prompt for _ in size_factors],
            [rescale_image_size(image, factor) for factor in size_factors],
            [],
@ -365,7 +365,7 @@ def test_qwen2_vl_multiple_image_embeddings_input(vllm_runner, image_assets,
                                                  num_logprobs: int) -> None:
    images = [asset.pil_image for asset in image_assets]

-    inputs_per_case: List[Tuple[List[str], PromptImageInput,
+    inputs_per_case: list[tuple[list[str], PromptImageInput,
                                PromptVideoInput]] = [(
                                    [MULTIIMAGE_PROMPT for _ in size_factors],
                                    [[
@ -413,8 +413,8 @@ def test_qwen2_vl_video_embeddings_input(vllm_runner, video_assets, model,
        for asset in video_assets
    ]

-    inputs_per_case: List[Tuple[
-        List[str], PromptImageInput, PromptVideoInput]] = [(
+    inputs_per_case: list[tuple[
+        list[str], PromptImageInput, PromptVideoInput]] = [(
            [prompt for _ in size_factors],
            [],
            [rescale_video_size(video, factor) for factor in size_factors],
--- a/tests/models/decoder_only/vision_language/vlm_utils/builders.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/builders.py
@ -1,8 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 """Helpers for building inputs that can be leveraged for different test types.
 """
+from collections.abc import Iterable
 from pathlib import PosixPath
-from typing import Callable, Iterable, List, Optional, Tuple, Union
+from typing import Callable, Optional, Union

 import torch

@ -33,7 +34,7 @@ def replace_test_placeholder(prompt: str, img_idx_to_prompt: Callable[[int],
 def get_model_prompts(base_prompts: Iterable[str],
                      img_idx_to_prompt: Optional[Callable[[int], str]],
                      video_idx_to_prompt: Optional[Callable[[int], str]],
-                      prompt_formatter: Callable[[str], str]) -> List[str]:
+                      prompt_formatter: Callable[[str], str]) -> list[str]:
    """Given a model-agnostic base prompt and test configuration for a model(s)
    to be tested, update the media placeholders and apply the prompt formatting
    to get the test prompt string for this model.
@ -218,7 +219,7 @@ def build_video_inputs_from_test_info(
    ) for video, prompt in zip(sampled_vids, model_prompts)]


-def apply_image_size_scaling(image, size: Union[float, Tuple[int, int]],
+def apply_image_size_scaling(image, size: Union[float, tuple[int, int]],
                             size_type: SizeType):
    """Applies a size scaler to one image; this can be a an image size factor,
    which scales the image while maintaining the aspect ratio"""
--- a/tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
@ -5,7 +5,7 @@ handling multimodal placeholder substitution, and so on.
 """
 import itertools
 from collections import OrderedDict
-from typing import Dict, Iterable, Tuple
+from collections.abc import Iterable

 import pytest

@ -13,9 +13,9 @@ from .types import (EMBEDDING_SIZE_FACTORS, ExpandableVLMTestArgs,
                    ImageSizeWrapper, SizeType, VLMTestInfo, VLMTestType)


-def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
+def get_filtered_test_settings(test_settings: dict[str, VLMTestInfo],
                               test_type: VLMTestType,
-                               fork_per_test: bool) -> Dict[str, VLMTestInfo]:
+                               fork_per_test: bool) -> dict[str, VLMTestInfo]:
    """Given the dict of potential test settings to run, return a subdict
    of tests who have the current test type enabled with the matching val for
    fork_per_test.
@ -49,7 +49,7 @@ def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
    return matching_tests


-def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],
+def get_parametrized_options(test_settings: dict[str, VLMTestInfo],
                             test_type: VLMTestType,
                             fork_new_process_for_each_test: bool):
    """Converts all of our VLMTestInfo into an expanded list of parameters.
@ -121,7 +121,7 @@ def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],

 def get_wrapped_test_sizes(
        test_info: VLMTestInfo,
-        test_type: VLMTestType) -> Tuple[ImageSizeWrapper, ...]:
+        test_type: VLMTestType) -> tuple[ImageSizeWrapper, ...]:
    """Given a test info which may have size factors or fixed sizes, wrap them
    and combine them into an iterable, each of which will be used in parameter
    expansion.
--- a/tests/models/decoder_only/vision_language/vlm_utils/core.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/core.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 """Core test implementation to be shared across modalities."""
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Optional, Union

 import torch
 from PIL.Image import Image
@ -17,9 +17,9 @@ from .types import RunnerOutput

 def run_test(
    *,
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    inputs: List[Tuple[List[str], List[Union[List[Image], Image]]]],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    inputs: list[tuple[list[str], list[Union[list[Image], Image]]]],
    model: str,
    dtype: str,
    max_tokens: int,
@ -29,15 +29,15 @@ def run_test(
    max_num_seqs: int,
    hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
    vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
-    auto_cls: Type[_BaseAutoModelClass],
+    auto_cls: type[_BaseAutoModelClass],
    use_tokenizer_eos: bool,
    postprocess_inputs: Callable[[BatchEncoding], BatchEncoding],
    comparator: Callable[..., None],
    get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
-    stop_str: Optional[List[str]],
-    limit_mm_per_prompt: Dict[str, int],
-    vllm_runner_kwargs: Optional[Dict[str, Any]],
-    hf_model_kwargs: Optional[Dict[str, Any]],
+    stop_str: Optional[list[str]],
+    limit_mm_per_prompt: dict[str, int],
+    vllm_runner_kwargs: Optional[dict[str, Any]],
+    hf_model_kwargs: Optional[dict[str, Any]],
    patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
    task: TaskOption = "auto",
    runner_mm_key: str = "images",
@ -61,7 +61,7 @@ def run_test(
    # if we run HF first, the cuda initialization will be done and it
    # will hurt multiprocessing backend with fork method (the default method).

-    vllm_runner_kwargs_: Dict[str, Any] = {}
+    vllm_runner_kwargs_: dict[str, Any] = {}
    if model_info.tokenizer:
        vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer
    if model_info.tokenizer_mode:
@ -84,7 +84,7 @@ def run_test(
                     **vllm_runner_kwargs_) as vllm_model:
        tokenizer = vllm_model.model.get_tokenizer()

-        vllm_kwargs: Dict[str, Any] = {}
+        vllm_kwargs: dict[str, Any] = {}
        if get_stop_token_ids is not None:
            vllm_kwargs["stop_token_ids"] = get_stop_token_ids(tokenizer)
        if stop_str:
--- a/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
@ -6,7 +6,7 @@ typically specific to a small subset of models.
 import re
 import types
 from pathlib import PosixPath
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, Optional, Union

 import torch
 from PIL.Image import Image
@ -49,7 +49,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,

 def qwen_vllm_to_hf_output(
        vllm_output: RunnerOutput,
-        model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
+        model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
    """Sanitize vllm output [qwen models] to be comparable with hf output."""
    output_ids, output_str, out_logprobs = vllm_output

@ -60,7 +60,7 @@ def qwen_vllm_to_hf_output(

 def qwen2_vllm_to_hf_output(
        vllm_output: RunnerOutput,
-        model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
+        model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
    """Sanitize vllm output [qwen2 models] to be comparable with hf output."""
    output_ids, output_str, out_logprobs = vllm_output

@ -78,7 +78,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,

 def llava_video_vllm_to_hf_output(
        vllm_output: RunnerOutput,
-        model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
+        model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
    config = AutoConfig.from_pretrained(model)
    mm_token_id = config.video_token_index
    return _llava_vllm_to_hf_output(vllm_output, model, mm_token_id)
@ -247,7 +247,7 @@ def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):

 ####### Prompt path encoders for models that need models on disk
 def qwen_prompt_path_encoder(
-        tmp_path: PosixPath, prompt: str, assets: Union[List[ImageAsset],
+        tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset],
                                                        _ImageAssets]) -> str:
    """Given a temporary dir path, export one or more image assets into the
    tempdir & replace its contents with the local path to the string so that
@ -257,7 +257,7 @@ def qwen_prompt_path_encoder(
    Args:
        tmp_path: Tempdir for test under consideration.
        prompt: Prompt with image placeholders.
-        assets: List of image assets whose len equals the num placeholders.
+        assets: list of image assets whose len equals the num placeholders.
    """
    # Ensure that the number of placeholders matches the number of assets;
    # If this is not true, the test is probably written incorrectly.
@ -350,7 +350,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
            self.max_num = self.config.max_dynamic_patch
            self.image_size = self.vision_config.image_size

-        def __call__(self, text: str, images: Union[Image, List[Image]],
+        def __call__(self, text: str, images: Union[Image, list[Image]],
                     **kwargs):
            # yapf: disable
            from vllm.model_executor.models.h2ovl import (
@ -410,7 +410,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
            self.max_num = self.config.max_dynamic_patch
            self.image_size = self.vision_config.image_size

-        def __call__(self, text: str, images: Union[Image, List[Image]],
+        def __call__(self, text: str, images: Union[Image, list[Image]],
                     **kwargs):
            from vllm.model_executor.models.internvl import (
                IMG_CONTEXT, IMG_END, IMG_START,
--- a/tests/models/decoder_only/vision_language/vlm_utils/runners.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/runners.py
@ -3,7 +3,6 @@
 types / modalities.
 """
 from pathlib import PosixPath
-from typing import Type

 from .....conftest import HfRunner, VllmRunner, _ImageAssets, _VideoAssets
 from . import builders, core
@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
 ####### Entrypoints for running different test types
 def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
                          test_case: ExpandableVLMTestArgs,
-                          hf_runner: Type[HfRunner],
-                          vllm_runner: Type[VllmRunner],
+                          hf_runner: type[HfRunner],
+                          vllm_runner: type[VllmRunner],
                          image_assets: _ImageAssets):
    assert test_case.size_wrapper is not None
    inputs = builders.build_single_image_inputs_from_test_info(
@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,

 def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
                         test_case: ExpandableVLMTestArgs,
-                         hf_runner: Type[HfRunner],
-                         vllm_runner: Type[VllmRunner],
+                         hf_runner: type[HfRunner],
+                         vllm_runner: type[VllmRunner],
                         image_assets: _ImageAssets):
    assert test_case.size_wrapper is not None
    inputs = builders.build_multi_image_inputs_from_test_info(
@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,

 def run_embedding_test(*, model_test_info: VLMTestInfo,
                       test_case: ExpandableVLMTestArgs,
-                       hf_runner: Type[HfRunner],
-                       vllm_runner: Type[VllmRunner],
+                       hf_runner: type[HfRunner],
+                       vllm_runner: type[VllmRunner],
                       image_assets: _ImageAssets):
    assert test_case.size_wrapper is not None
    inputs, vllm_embeddings = builders.build_embedding_inputs_from_test_info(
@ -85,8 +84,8 @@ def run_video_test(
    *,
    model_test_info: VLMTestInfo,
    test_case: ExpandableVLMTestArgs,
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    video_assets: _VideoAssets,
 ):
    assert test_case.size_wrapper is not None
@ -111,8 +110,8 @@ def run_video_test(

 def run_custom_inputs_test(*, model_test_info: VLMTestInfo,
                           test_case: ExpandableVLMTestArgs,
-                           hf_runner: Type[HfRunner],
-                           vllm_runner: Type[VllmRunner]):
+                           hf_runner: type[HfRunner],
+                           vllm_runner: type[VllmRunner]):
    # Custom test cases can provide inputs directly, but they need to
    # explicitly provided a CustomTestConfig, which wraps the inputs and
    # the limit_mm_per_prompt
--- a/tests/models/decoder_only/vision_language/vlm_utils/types.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/types.py
@ -1,9 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 """Types for writing multimodal model tests."""
+from collections.abc import Iterable
 from enum import Enum
 from pathlib import PosixPath
-from typing import (Any, Callable, Dict, Iterable, List, NamedTuple, Optional,
-                    Tuple, Type, Union)
+from typing import Any, Callable, NamedTuple, Optional, Union

 import torch
 from PIL.Image import Image
@ -35,7 +35,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"

 IMAGE_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
 EMBEDDING_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0)]
-RunnerOutput = Tuple[List[int], str, Optional[SampleLogprobs]]
+RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
 # yapf: enable


@ -53,8 +53,8 @@ class SizeType(Enum):


 class CustomTestOptions(NamedTuple):
-    inputs: List[Tuple[List[str], List[Union[List[Image], Image]]]]
-    limit_mm_per_prompt: Dict[str, int]
+    inputs: list[tuple[list[str], list[Union[list[Image], Image]]]]
+    limit_mm_per_prompt: dict[str, int]
    # kwarg to pass multimodal data in as to vllm/hf runner instances.
    runner_mm_key: str = "images"

@ -63,13 +63,13 @@ class ImageSizeWrapper(NamedTuple):
    type: SizeType
    # A size factor is a wrapper of 0+ floats,
    # while a fixed size contains an iterable of integer pairs
-    data: Union[Iterable[float], Iterable[Tuple[int, int]]]
+    data: Union[Iterable[float], Iterable[tuple[int, int]]]


 class VLMTestInfo(NamedTuple):
    """Holds the configuration for 1+ tests for one model architecture."""

-    models: List[str]
+    models: list[str]
    test_type: Union[VLMTestType, Iterable[VLMTestType]]

    # Should be None only if this is a CUSTOM_INPUTS test
@ -97,19 +97,19 @@ class VLMTestInfo(NamedTuple):
    max_num_seqs: int = 256
    task: TaskOption = "auto"
    tensor_parallel_size: int = 1
-    vllm_runner_kwargs: Optional[Dict[str, Any]] = None
+    vllm_runner_kwargs: Optional[dict[str, Any]] = None

    # Optional callable which gets a list of token IDs from the model tokenizer
    get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]] = None
    # Optional list of strings to stop generation, useful when stop tokens are
    # not special tokens in the tokenizer
-    stop_str: Optional[List[str]] = None
+    stop_str: Optional[list[str]] = None

    # Exposed options for HF runner
-    hf_model_kwargs: Optional[Dict[str, Any]] = None
+    hf_model_kwargs: Optional[dict[str, Any]] = None
    # Indicates we should explicitly pass the EOS from the tokenizer
    use_tokenizer_eos: bool = False
-    auto_cls: Type[_BaseAutoModelClass] = AutoModelForCausalLM
+    auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
    # Callable to pass to the HF runner to run on inputs; for now, we also pass
    # the data type to input post processing, because almost all of the uses of
    # postprocess_inputs are to fix the data types of BatchEncoding values.
@ -128,12 +128,12 @@ class VLMTestInfo(NamedTuple):
    # Default expandable params per test; these defaults can be overridden in
    # instances of this object; the complete set of test cases for the model
    # is all combinations of .models + all fields below
-    max_tokens: Union[int, Tuple[int]] = 128
-    num_logprobs: Union[int, Tuple[int]] = 5
+    max_tokens: Union[int, tuple[int]] = 128
+    num_logprobs: Union[int, tuple[int]] = 5
    dtype: Union[str, Iterable[str]] = "half"
    distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
    # Only expanded in video tests
-    num_video_frames: Union[int, Tuple[int]] = 16
+    num_video_frames: Union[int, tuple[int]] = 16

    # Fixed image sizes / image size factors; most tests use image_size_factors
    # The values provided for these two fields will be stacked and expanded
@ -141,19 +141,19 @@ class VLMTestInfo(NamedTuple):
    # once per tests (much like concatenating and wrapping in one parametrize
    # call)
    image_size_factors: Iterable[Iterable[float]] = IMAGE_SIZE_FACTORS
-    image_sizes: Optional[Iterable[Iterable[Tuple[int, int]]]] = None
+    image_sizes: Optional[Iterable[Iterable[tuple[int, int]]]] = None

    # Hack for updating a prompt to take into a local path; currently only used
    # for Qwen-VL, which requires encoding the image path / url into the prompt
    # for HF runner
    prompt_path_encoder: Optional[
-        Callable[[PosixPath, str, Union[List[ImageAsset], _ImageAssets]],
+        Callable[[PosixPath, str, Union[list[ImageAsset], _ImageAssets]],
                 str]] = None  # noqa: E501

    # Allows configuring a test to run with custom inputs
-    custom_test_opts: Optional[List[CustomTestOptions]] = None
+    custom_test_opts: Optional[list[CustomTestOptions]] = None

-    marks: Optional[List[MarkDecorator]] = None
+    marks: Optional[list[MarkDecorator]] = None

    def get_non_parametrized_runner_kwargs(self):
        """Returns a dictionary of expandable kwargs for items that are used