diff --git a/csrc/quantization/machete/generate.py b/csrc/quantization/machete/generate.py
index f7106f016b..d29a199c5d 100644
--- a/csrc/quantization/machete/generate.py
+++ b/csrc/quantization/machete/generate.py
@@ -12,9 +12,6 @@ from functools import reduce
 from typing import Optional, Union
 
 import jinja2
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm_cutlass_library_extension import (
     DataType,
     EpilogueScheduleTag,
@@ -31,8 +28,6 @@ from vllm_cutlass_library_extension import (
     VLLMKernelScheduleTag,
 )
 
-# yapf: enable
-
 #
 #   Generator templating
 #
diff --git a/examples/others/tensorize_vllm_model.py b/examples/others/tensorize_vllm_model.py
index 2b7f0beab2..acbfd8cda4 100644
--- a/examples/others/tensorize_vllm_model.py
+++ b/examples/others/tensorize_vllm_model.py
@@ -21,8 +21,6 @@ from vllm.utils import FlexibleArgumentParser
 logger = logging.getLogger()
 
 
-# yapf conflicts with isort for this docstring
-# yapf: disable
 """
 tensorize_vllm_model.py is a script that can be used to serialize and 
 deserialize vLLM models. These models can be loaded using tensorizer 
@@ -132,7 +130,8 @@ def get_parser():
         "can be loaded using tensorizer directly to the GPU "
         "extremely quickly. Tensor encryption and decryption is "
         "also supported, although libsodium must be installed to "
-        "use it.")
+        "use it."
+    )
     parser = EngineArgs.add_cli_args(parser)
 
     parser.add_argument(
@@ -144,13 +143,14 @@ def get_parser():
         "along with the model by instantiating a TensorizerConfig object, "
         "creating a dict from it with TensorizerConfig.to_serializable(), "
         "and passing it to LoRARequest's initializer with the kwarg "
-        "tensorizer_config_dict."
+        "tensorizer_config_dict.",
     )
 
-    subparsers = parser.add_subparsers(dest='command', required=True)
+    subparsers = parser.add_subparsers(dest="command", required=True)
 
     serialize_parser = subparsers.add_parser(
-        'serialize', help="Serialize a model to `--serialized-directory`")
+        "serialize", help="Serialize a model to `--serialized-directory`"
+    )
 
     serialize_parser.add_argument(
         "--suffix",
@@ -163,7 +163,9 @@ def get_parser():
             "`--suffix` is `v1`, the serialized model tensors will be "
             "saved to "
             "`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. "
-            "If none is provided, a random UUID will be used."))
+            "If none is provided, a random UUID will be used."
+        ),
+    )
     serialize_parser.add_argument(
         "--serialized-directory",
         type=str,
@@ -175,108 +177,127 @@ def get_parser():
         "and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will "
         "be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, "
         "where `suffix` is given by `--suffix` or a random UUID if not "
-        "provided.")
+        "provided.",
+    )
 
     serialize_parser.add_argument(
         "--serialization-kwargs",
         type=tensorizer_kwargs_arg,
         required=False,
-        help=("A JSON string containing additional keyword arguments to "
-              "pass to Tensorizer's TensorSerializer during "
-              "serialization."))
+        help=(
+            "A JSON string containing additional keyword arguments to "
+            "pass to Tensorizer's TensorSerializer during "
+            "serialization."
+        ),
+    )
 
     serialize_parser.add_argument(
         "--keyfile",
         type=str,
         required=False,
-        help=("Encrypt the model weights with a randomly-generated binary key,"
-              " and save the key at this path"))
+        help=(
+            "Encrypt the model weights with a randomly-generated binary key,"
+            " and save the key at this path"
+        ),
+    )
 
     deserialize_parser = subparsers.add_parser(
-        'deserialize',
-        help=("Deserialize a model from `--path-to-tensors`"
-              " to verify it can be loaded and used."))
+        "deserialize",
+        help=(
+            "Deserialize a model from `--path-to-tensors`"
+            " to verify it can be loaded and used."
+        ),
+    )
 
     deserialize_parser.add_argument(
         "--path-to-tensors",
         type=str,
         required=False,
-        help="The local path or S3 URI to the model tensors to deserialize. ")
+        help="The local path or S3 URI to the model tensors to deserialize. ",
+    )
 
     deserialize_parser.add_argument(
         "--serialized-directory",
         type=str,
         required=False,
         help="Directory with model artifacts for loading. Assumes a "
-             "model.tensors file exists therein. Can supersede "
-             "--path-to-tensors.")
+        "model.tensors file exists therein. Can supersede "
+        "--path-to-tensors.",
+    )
 
     deserialize_parser.add_argument(
         "--keyfile",
         type=str,
         required=False,
-        help=("Path to a binary key to use to decrypt the model weights,"
-              " if the model was serialized with encryption"))
+        help=(
+            "Path to a binary key to use to decrypt the model weights,"
+            " if the model was serialized with encryption"
+        ),
+    )
 
     deserialize_parser.add_argument(
         "--deserialization-kwargs",
         type=tensorizer_kwargs_arg,
         required=False,
-        help=("A JSON string containing additional keyword arguments to "
-              "pass to Tensorizer's `TensorDeserializer` during "
-              "deserialization."))
+        help=(
+            "A JSON string containing additional keyword arguments to "
+            "pass to Tensorizer's `TensorDeserializer` during "
+            "deserialization."
+        ),
+    )
 
     TensorizerArgs.add_cli_args(deserialize_parser)
 
     return parser
 
-def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
-                                              cfg: TensorizerConfig):
+
+def merge_extra_config_with_tensorizer_config(extra_cfg: dict, cfg: TensorizerConfig):
     for k, v in extra_cfg.items():
         if hasattr(cfg, k):
             setattr(cfg, k, v)
             logger.info(
                 "Updating TensorizerConfig with %s from "
-                "--model-loader-extra-config provided", k
+                "--model-loader-extra-config provided",
+                k,
             )
 
+
 def deserialize(args, tensorizer_config):
     if args.lora_path:
         tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
-        llm = LLM(model=args.model,
-                  load_format="tensorizer",
-                  tensor_parallel_size=args.tensor_parallel_size,
-                  model_loader_extra_config=tensorizer_config,
-                  enable_lora=True,
+        llm = LLM(
+            model=args.model,
+            load_format="tensorizer",
+            tensor_parallel_size=args.tensor_parallel_size,
+            model_loader_extra_config=tensorizer_config,
+            enable_lora=True,
         )
         sampling_params = SamplingParams(
-            temperature=0,
-            max_tokens=256,
-            stop=["[/assistant]"]
+            temperature=0, max_tokens=256, stop=["[/assistant]"]
         )
 
         # Truncating this as the extra text isn't necessary
-        prompts = [
-            "[user] Write a SQL query to answer the question based on ..."
-        ]
+        prompts = ["[user] Write a SQL query to answer the question based on ..."]
 
         # Test LoRA load
         print(
             llm.generate(
-            prompts,
-            sampling_params,
-            lora_request=LoRARequest("sql-lora",
-                                     1,
-                                     args.lora_path,
-                                     tensorizer_config_dict = tensorizer_config
-                                     .to_serializable())
+                prompts,
+                sampling_params,
+                lora_request=LoRARequest(
+                    "sql-lora",
+                    1,
+                    args.lora_path,
+                    tensorizer_config_dict=tensorizer_config.to_serializable(),
+                ),
             )
         )
     else:
-        llm = LLM(model=args.model,
-                  load_format="tensorizer",
-                  tensor_parallel_size=args.tensor_parallel_size,
-                  model_loader_extra_config=tensorizer_config
+        llm = LLM(
+            model=args.model,
+            load_format="tensorizer",
+            tensor_parallel_size=args.tensor_parallel_size,
+            model_loader_extra_config=tensorizer_config,
         )
     return llm
 
@@ -285,17 +306,20 @@ def main():
     parser = get_parser()
     args = parser.parse_args()
 
-    s3_access_key_id = (getattr(args, 's3_access_key_id', None)
-                        or os.environ.get("S3_ACCESS_KEY_ID", None))
-    s3_secret_access_key = (getattr(args, 's3_secret_access_key', None)
-                            or os.environ.get("S3_SECRET_ACCESS_KEY", None))
-    s3_endpoint = (getattr(args, 's3_endpoint', None)
-                or os.environ.get("S3_ENDPOINT_URL", None))
+    s3_access_key_id = getattr(args, "s3_access_key_id", None) or os.environ.get(
+        "S3_ACCESS_KEY_ID", None
+    )
+    s3_secret_access_key = getattr(
+        args, "s3_secret_access_key", None
+    ) or os.environ.get("S3_SECRET_ACCESS_KEY", None)
+    s3_endpoint = getattr(args, "s3_endpoint", None) or os.environ.get(
+        "S3_ENDPOINT_URL", None
+    )
 
     credentials = {
         "s3_access_key_id": s3_access_key_id,
         "s3_secret_access_key": s3_secret_access_key,
-        "s3_endpoint": s3_endpoint
+        "s3_endpoint": s3_endpoint,
     }
 
     model_ref = args.model
@@ -309,25 +333,25 @@ def main():
     if args.model_loader_extra_config:
         extra_config = json.loads(args.model_loader_extra_config)
 
-
-    tensorizer_dir = (args.serialized_directory or
-                      extra_config.get("tensorizer_dir"))
-    tensorizer_uri = (getattr(args, "path_to_tensors", None)
-                      or extra_config.get("tensorizer_uri"))
+    tensorizer_dir = args.serialized_directory or extra_config.get("tensorizer_dir")
+    tensorizer_uri = getattr(args, "path_to_tensors", None) or extra_config.get(
+        "tensorizer_uri"
+    )
 
     if tensorizer_dir and tensorizer_uri:
-        parser.error("--serialized-directory and --path-to-tensors "
-                     "cannot both be provided")
+        parser.error(
+            "--serialized-directory and --path-to-tensors cannot both be provided"
+        )
 
     if not tensorizer_dir and not tensorizer_uri:
-        parser.error("Either --serialized-directory or --path-to-tensors "
-                     "must be provided")
-
+        parser.error(
+            "Either --serialized-directory or --path-to-tensors must be provided"
+        )
 
     if args.command == "serialize":
         engine_args = EngineArgs.from_cli_args(args)
 
-        input_dir = tensorizer_dir.rstrip('/')
+        input_dir = tensorizer_dir.rstrip("/")
         suffix = args.suffix if args.suffix else uuid.uuid4().hex
         base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
         if engine_args.tensor_parallel_size > 1:
@@ -339,15 +363,14 @@ def main():
             tensorizer_uri=model_path,
             encryption_keyfile=keyfile,
             serialization_kwargs=args.serialization_kwargs or {},
-            **credentials
+            **credentials,
         )
 
         if args.lora_path:
             tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
             tensorize_lora_adapter(args.lora_path, tensorizer_config)
 
-        merge_extra_config_with_tensorizer_config(extra_config,
-                                                  tensorizer_config)
+        merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
         tensorize_vllm_model(engine_args, tensorizer_config)
 
     elif args.command == "deserialize":
@@ -356,11 +379,10 @@ def main():
             tensorizer_dir=args.serialized_directory,
             encryption_keyfile=keyfile,
             deserialization_kwargs=args.deserialization_kwargs or {},
-            **credentials
+            **credentials,
         )
 
-        merge_extra_config_with_tensorizer_config(extra_config,
-                                                  tensorizer_config)
+        merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
         deserialize(args, tensorizer_config)
     else:
         raise ValueError("Either serialize or deserialize must be specified.")
diff --git a/tests/compile/test_silu_mul_quant_fusion.py b/tests/compile/test_silu_mul_quant_fusion.py
index 7e3a230b5f..16a4271655 100644
--- a/tests/compile/test_silu_mul_quant_fusion.py
+++ b/tests/compile/test_silu_mul_quant_fusion.py
@@ -8,16 +8,11 @@ import torch
 import vllm.envs as envs
 from tests.kernels.quantization.nvfp4_utils import quant_nvfp4_tensor
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.compilation.activation_quant_fusion import (
     FUSED_OPS,
     SILU_MUL_OP,
     ActivationQuantFusionPass,
 )
-
-# yapf: enable
 from vllm.compilation.fusion import QUANT_OPS
 from vllm.compilation.noop_elimination import NoOpEliminationPass
 from vllm.compilation.post_cleanup import PostCleanupPass
diff --git a/tests/distributed/test_expert_parallel.py b/tests/distributed/test_expert_parallel.py
index 68ac52af30..94f0ece497 100644
--- a/tests/distributed/test_expert_parallel.py
+++ b/tests/distributed/test_expert_parallel.py
@@ -107,10 +107,8 @@ class EPTestSettings:
 # NOTE: You can adjust tp_base locally to fit the model in GPU
 # The values displayed here are only a rough indicator of the size of the model
 
-# yapf: disable
 TEST_MODELS = {
-    "deepseek-ai/DeepSeek-V2-Lite-Chat": EPTestSettings.fast(
-        trust_remote_code=True),
+    "deepseek-ai/DeepSeek-V2-Lite-Chat": EPTestSettings.fast(trust_remote_code=True),
     "mistralai/Mixtral-8x7B-Instruct-v0.1": EPTestSettings.fast(tp_base=4),
 }
 
@@ -192,22 +190,24 @@ def _compare_tp(
     ]
 
     try:
-        compare_two_settings(model_name,
-                             ep_args,
-                             tp_args,
-                             ep_env,
-                             tp_env,
-                             method=method,
-                             max_wait_seconds=360)
+        compare_two_settings(
+            model_name,
+            ep_args,
+            tp_args,
+            ep_env,
+            tp_env,
+            method=method,
+            max_wait_seconds=360,
+        )
     except Exception:
         raise
 
 
 @pytest.mark.parametrize(
-    ("model_name", "parallel_setup", "distributed_backend", "runner",
-     "test_options"),
+    ("model_name", "parallel_setup", "distributed_backend", "runner", "test_options"),
     [
-        params for model_name, settings in TEST_MODELS.items()
+        params
+        for model_name, settings in TEST_MODELS.items()
         for params in settings.iter_params(model_name)
     ],
 )
@@ -220,10 +220,12 @@ def test_ep(
     test_options: EPTestOptions,
     num_gpus_available,
 ):
-    _compare_tp(model_name,
-                parallel_setup,
-                distributed_backend,
-                runner,
-                test_options,
-                num_gpus_available,
-                method="generate")
+    _compare_tp(
+        model_name,
+        parallel_setup,
+        distributed_backend,
+        runner,
+        test_options,
+        num_gpus_available,
+        method="generate",
+    )
diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index 26ee60c150..119e8e7621 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -100,7 +100,6 @@ class PPTestSettings:
 # NOTE: You can adjust tp_base and/or pp_base locally to fit the model in GPU
 # The values displayed here are only a rough indicator of the size of the model
 
-# yapf: disable
 TEXT_GENERATION_MODELS = {
     # [Decoder-only]
     # Uses Llama
@@ -150,7 +149,9 @@ TEXT_GENERATION_MODELS = {
     "adept/persimmon-8b-chat": PPTestSettings.fast(),
     "microsoft/phi-2": PPTestSettings.fast(),
     "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
-    "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(multi_node_only=True, load_format="dummy"),  # noqa: E501
+    "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(
+        multi_node_only=True, load_format="dummy"
+    ),  # noqa: E501
     "Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
     "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
     "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
@@ -196,7 +197,6 @@ MULTIMODAL_MODELS = {
     "Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(),
     "fixie-ai/ultravox-v0_5-llama-3_2-1b": PPTestSettings.fast(),
 }
-# yapf: enable
 
 # NOTE: You can update this on your local machine to run specific tests
 TEST_MODELS = [
diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py
index c0eb0e5ac5..9d367349fc 100644
--- a/tests/engine/test_arg_utils.py
+++ b/tests/engine/test_arg_utils.py
@@ -287,29 +287,15 @@ def test_prefix_cache_default():
     assert not engine_args.enable_prefix_caching
 
 
-# yapf: disable
-@pytest.mark.parametrize(("arg", "expected", "option"), [
-    (None, None, "mm-processor-kwargs"),
-    ("{}", {}, "mm-processor-kwargs"),
-    (
-        '{"num_crops": 4}',
-        {
-            "num_crops": 4
-        },
-        "mm-processor-kwargs"
-    ),
-    (
-        '{"foo": {"bar": "baz"}}',
-        {
-            "foo":
-            {
-                "bar": "baz"
-            }
-        },
-        "mm-processor-kwargs"
-    ),
-])
-# yapf: enable
+@pytest.mark.parametrize(
+    ("arg", "expected", "option"),
+    [
+        (None, None, "mm-processor-kwargs"),
+        ("{}", {}, "mm-processor-kwargs"),
+        ('{"num_crops": 4}', {"num_crops": 4}, "mm-processor-kwargs"),
+        ('{"foo": {"bar": "baz"}}', {"foo": {"bar": "baz"}}, "mm-processor-kwargs"),
+    ],
+)
 def test_composite_arg_parser(arg, expected, option):
     parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
     if arg is None:
@@ -321,8 +307,7 @@ def test_composite_arg_parser(arg, expected, option):
 
 def test_human_readable_model_len():
     # `exit_on_error` disabled to test invalid values below
-    parser = EngineArgs.add_cli_args(
-        FlexibleArgumentParser(exit_on_error=False))
+    parser = EngineArgs.add_cli_args(FlexibleArgumentParser(exit_on_error=False))
 
     args = parser.parse_args([])
     assert args.max_model_len is None
diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 7ddad4d513..975ca53a3a 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -15,6 +15,7 @@ from vllm.assets.video import VideoAsset
 from vllm.config import ModelConfig
 from vllm.entrypoints.chat_utils import (
     _try_extract_ast,
+    apply_mistral_chat_template,
     load_chat_template,
     parse_chat_messages,
     parse_chat_messages_futures,
@@ -1855,17 +1856,17 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
 
 # NOTE: Qwen2-Audio default chat template is specially defined inside
 # processor class instead of using `tokenizer_config.json`
-# yapf: disable
 @pytest.mark.parametrize(
     ("model", "expected_format"),
-    [(PHI3V_MODEL_ID, "string"),
-     (QWEN2VL_MODEL_ID, "openai"),
-     (QWEN25VL_MODEL_ID, "openai"),
-     (ULTRAVOX_MODEL_ID, "string"),
-     (QWEN2AUDIO_MODEL_ID, "openai"),
-     (LLAMA_GUARD_MODEL_ID, "openai")],
+    [
+        (PHI3V_MODEL_ID, "string"),
+        (QWEN2VL_MODEL_ID, "openai"),
+        (QWEN25VL_MODEL_ID, "openai"),
+        (ULTRAVOX_MODEL_ID, "string"),
+        (QWEN2AUDIO_MODEL_ID, "openai"),
+        (LLAMA_GUARD_MODEL_ID, "openai"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_hf_defined(model, expected_format):
     model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
     model_info.check_available_online(on_fail="skip")
@@ -1879,7 +1880,8 @@ def test_resolve_content_format_hf_defined(model, expected_format):
         hf_overrides=model_info.hf_overrides,
         skip_tokenizer_init=model_info.skip_tokenizer_init,
         enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype)
+        dtype=model_info.dtype,
+    )
 
     tokenizer = get_tokenizer(
         model,
@@ -1911,18 +1913,18 @@ def test_resolve_content_format_hf_defined(model, expected_format):
     assert resolved_format == expected_format
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("model", "expected_format"),
-    [("Salesforce/blip2-opt-2.7b", "string"),
-     ("facebook/chameleon-7b", "string"),
-     ("deepseek-ai/deepseek-vl2-tiny", "string"),
-     ("adept/fuyu-8b", "string"),
-     ("google/paligemma-3b-mix-224", "string"),
-     ("Qwen/Qwen-VL", "string"),
-     ("Qwen/Qwen-VL-Chat", "string")],
+    [
+        ("Salesforce/blip2-opt-2.7b", "string"),
+        ("facebook/chameleon-7b", "string"),
+        ("deepseek-ai/deepseek-vl2-tiny", "string"),
+        ("adept/fuyu-8b", "string"),
+        ("google/paligemma-3b-mix-224", "string"),
+        ("Qwen/Qwen-VL", "string"),
+        ("Qwen/Qwen-VL-Chat", "string"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_fallbacks(model, expected_format):
     model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
     model_info.check_available_online(on_fail="skip")
@@ -1936,7 +1938,8 @@ def test_resolve_content_format_fallbacks(model, expected_format):
         hf_overrides=model_info.hf_overrides,
         skip_tokenizer_init=model_info.skip_tokenizer_init,
         enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype)
+        dtype=model_info.dtype,
+    )
 
     tokenizer = get_tokenizer(
         model_config.tokenizer,
@@ -1968,30 +1971,30 @@ def test_resolve_content_format_fallbacks(model, expected_format):
     assert resolved_format == expected_format
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("template_path", "expected_format"),
-    [("template_alpaca.jinja", "string"),
-     ("template_baichuan.jinja", "string"),
-     ("template_chatglm.jinja", "string"),
-     ("template_chatglm2.jinja", "string"),
-     ("template_chatml.jinja", "string"),
-     ("template_dse_qwen2_vl.jinja", "openai"),
-     ("template_falcon_180b.jinja", "string"),
-     ("template_falcon.jinja", "string"),
-     ("template_inkbot.jinja", "string"),
-     ("template_teleflm.jinja", "string"),
-     ("template_vlm2vec_phi3v.jinja", "openai"),
-     ("template_vlm2vec_qwen2vl.jinja", "openai"),
-     ("tool_chat_template_granite_20b_fc.jinja", "string"),
-     ("tool_chat_template_hermes.jinja", "string"),
-     ("tool_chat_template_internlm2_tool.jinja", "string"),
-     ("tool_chat_template_llama3.1_json.jinja", "openai"),
-     ("tool_chat_template_llama3.2_json.jinja", "openai"),
-     ("tool_chat_template_mistral_parallel.jinja", "string"),
-     ("tool_chat_template_mistral.jinja", "string")],
+    [
+        ("template_alpaca.jinja", "string"),
+        ("template_baichuan.jinja", "string"),
+        ("template_chatglm.jinja", "string"),
+        ("template_chatglm2.jinja", "string"),
+        ("template_chatml.jinja", "string"),
+        ("template_dse_qwen2_vl.jinja", "openai"),
+        ("template_falcon_180b.jinja", "string"),
+        ("template_falcon.jinja", "string"),
+        ("template_inkbot.jinja", "string"),
+        ("template_teleflm.jinja", "string"),
+        ("template_vlm2vec_phi3v.jinja", "openai"),
+        ("template_vlm2vec_qwen2vl.jinja", "openai"),
+        ("tool_chat_template_granite_20b_fc.jinja", "string"),
+        ("tool_chat_template_hermes.jinja", "string"),
+        ("tool_chat_template_internlm2_tool.jinja", "string"),
+        ("tool_chat_template_llama3.1_json.jinja", "openai"),
+        ("tool_chat_template_llama3.2_json.jinja", "openai"),
+        ("tool_chat_template_mistral_parallel.jinja", "string"),
+        ("tool_chat_template_mistral.jinja", "string"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_examples(template_path, expected_format):
     model_config = ModelConfig(
         PHI3V_MODEL_ID,  # Dummy
@@ -2024,40 +2027,34 @@ def test_resolve_content_format_examples(template_path, expected_format):
     assert resolved_format == expected_format
 
 
-def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
-                                                    mistral_tokenizer):
-    messages = [{
-        "role":
-        "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type":
-            "thinking",
-            "closed":
-            True,
-            "thinking":
-            "Only return the answer when you are confident."
-        }]
-    }, {
-        "role": "user",
-        "content": "What is 2+2?"
-    }, {
-        "role":
-        "assistant",
-        "content": [{
-            "type": "text",
-            "text": "Let me think about it."
-        }, {
-            "type": "thinking",
-            "closed": True,
-            "thinking": "2+2 = 4"
-        }, {
-            "type": "text",
-            "text": "The answer is 4.",
-        }],
-    }]
+def test_parse_chat_messages_include_thinking_chunk(
+    mistral_model_config, mistral_tokenizer
+):
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {
+                    "type": "thinking",
+                    "closed": True,
+                    "thinking": "Only return the answer when you are confident.",
+                },
+            ],
+        },
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
+                {
+                    "type": "text",
+                    "text": "The answer is 4.",
+                },
+            ],
+        },
+    ]
 
     conversation_with_thinking, _, _ = parse_chat_messages(
         messages,
@@ -2066,122 +2063,105 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
         content_format="openai",
     )
 
-    expected_conversation = [{
-        "role":
-        "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type": "text",
-            "text": "Only return the answer when you are confident."
-        }],
-    }, {
-        "role":
-        "user",
-        "content": [{
-            "type": "text",
-            "text": "What is 2+2?"
-        }],
-    }, {
-        "role":
-        "assistant",
-        "content": [
-            {
-                "type": "text",
-                "text": "Let me think about it."
-            },
-            {
-                "type": "text",
-                "text": "2+2 = 4"
-            },
-            {
-                "type": "text",
-                "text": "The answer is 4."
-            },
-        ]
-    }]
+    expected_conversation = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {
+                    "type": "text",
+                    "text": "Only return the answer when you are confident.",
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": "What is 2+2?"}],
+        },
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "text", "text": "2+2 = 4"},
+                {"type": "text", "text": "The answer is 4."},
+            ],
+        },
+    ]
 
     assert conversation_with_thinking == expected_conversation
 
 
 def test_apply_mistral_chat_template_thinking_chunk():
-    # Moved import here to avoid yapf and isort conflicts
-    from vllm.entrypoints.chat_utils import apply_mistral_chat_template
-    messages = [{
-        "role":
-        "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type":
-            "thinking",
-            "closed":
-            True,
-            "thinking":
-            "Only return the answer when you are confident."
-        }]
-    }, {
-        "role": "user",
-        "content": "What is 2+2?"
-    }, {
-        "role":
-        "assistant",
-        "content": [{
-            "type": "text",
-            "text": "Let me think about it."
-        }, {
-            "type": "thinking",
-            "closed": True,
-            "thinking": "2+2 = 4"
-        }, {
-            "type": "text",
-            "text": "The answer is 4.",
-        }],
-    }, {
-        "role": "user",
-        "content": "Thanks, what is 3+3?"
-    }]
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {
+                    "type": "thinking",
+                    "closed": True,
+                    "thinking": "Only return the answer when you are confident.",
+                },
+            ],
+        },
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
+                {
+                    "type": "text",
+                    "text": "The answer is 4.",
+                },
+            ],
+        },
+        {"role": "user", "content": "Thanks, what is 3+3?"},
+    ]
 
     # TODO(Julien): upon model release change to a tokenizer already configured.
     # =================================================================
     mistral_tokenizer = MistralTokenizer.from_pretrained(
-        "mistralai/Devstral-Small-2507")
+        "mistralai/Devstral-Small-2507"
+    )
     assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
     # Add think special tokens to the tokenizer
     mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
-        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
+        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
+    )
     mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
-        rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
+        rank=36, is_control=True, token_str=SpecialTokens.end_think.value
+    )
     mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
         k: v
-        for k, v in
-        mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
+        for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
         if v not in {35, 36}
     }
     mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.begin_think.value] = 35
+        SpecialTokens.begin_think.value
+    ] = 35
     mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.end_think.value] = 36
+        SpecialTokens.end_think.value
+    ] = 36
     mistral_tokenizer.instruct.BEGIN_THINK = 35
     mistral_tokenizer.instruct.END_THINK = 36
     # =================================================================
 
-    tokens_ids = apply_mistral_chat_template(mistral_tokenizer,
-                                             messages,
-                                             chat_template=None,
-                                             tools=None)
+    tokens_ids = apply_mistral_chat_template(
+        mistral_tokenizer, messages, chat_template=None, tools=None
+    )
 
     string_tokens = mistral_tokenizer.mistral.decode(
-        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP)
+        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP
+    )
 
     expected_tokens = (
         r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
         r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
         r"[INST]What is 2+2?[/INST]"
         r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
-        r"[INST]Thanks, what is 3+3?[/INST]")
+        r"[INST]Thanks, what is 3+3?[/INST]"
+    )
 
     assert string_tokens == expected_tokens
 
@@ -2192,37 +2172,32 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
 ):
     audio_uuid = "abcd"
     conversation, mm_data, mm_uuids = parse_chat_messages(
-        [{
-            "role":
-            "user",
-            "content": [
-                {
-                    "type": "input_audio",
-                    "input_audio": {},
-                    "uuid": audio_uuid,
-                },
-                {
-                    "type": "text",
-                    "text": "What does the audio say?"
-                },
-            ],
-        }],
+        [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "input_audio",
+                        "input_audio": {},
+                        "uuid": audio_uuid,
+                    },
+                    {"type": "text", "text": "What does the audio say?"},
+                ],
+            }
+        ],
         qwen2_audio_model_config,
         qwen2_audio_tokenizer,
         content_format="string",
     )
 
-    assert conversation == [{
-        "role":
-        "user",
-        "content":
-        "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?"
-    }]
+    assert conversation == [
+        {
+            "role": "user",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+        }
+    ]
     _assert_mm_data_inputs(mm_data, {"audio": 1})
-    _assert_mm_uuids(mm_uuids,
-                     1,
-                     modality="audio",
-                     expected_uuids=[audio_uuid])
+    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])
 
 
 @pytest.mark.asyncio
@@ -2232,34 +2207,29 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
 ):
     audio_uuid = "abcd"
     conversation, mm_future, mm_uuids = parse_chat_messages_futures(
-        [{
-            "role":
-            "user",
-            "content": [
-                {
-                    "type": "input_audio",
-                    "input_audio": {},
-                    "uuid": audio_uuid,
-                },
-                {
-                    "type": "text",
-                    "text": "What does the audio say?"
-                },
-            ],
-        }],
+        [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "input_audio",
+                        "input_audio": {},
+                        "uuid": audio_uuid,
+                    },
+                    {"type": "text", "text": "What does the audio say?"},
+                ],
+            }
+        ],
         qwen2_audio_model_config,
         qwen2_audio_tokenizer,
         content_format="string",
     )
 
-    assert conversation == [{
-        "role":
-        "user",
-        "content":
-        "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?"
-    }]
+    assert conversation == [
+        {
+            "role": "user",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+        }
+    ]
     _assert_mm_data_inputs(await mm_future, {"audio": 1})
-    _assert_mm_uuids(mm_uuids,
-                     1,
-                     modality="audio",
-                     expected_uuids=[audio_uuid])
+    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 7c29a85298..695e06e7c1 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -12,9 +12,6 @@ import torch
 import torch.nn.functional as F
 
 from vllm.config.lora import LoRAConfig
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.lora.layers import (
     BaseLayerWithLoRA,
     ColumnParallelLinearWithLoRA,
@@ -32,8 +29,6 @@ from vllm.lora.layers import (
     RowParallelLinearWithShardedLoRA,
     VocabParallelEmbeddingWithLoRA,
 )
-
-# yapf: enable
 from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.punica_wrapper import get_punica_wrapper
 from vllm.model_executor.layers.linear import (
diff --git a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
index ed86a6b8b1..57db1f98ba 100644
--- a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
+++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
@@ -17,8 +17,6 @@ import vllm.model_executor.model_loader.tensorizer
 from tests.utils import VLLM_PATH, RemoteOpenAIServer
 from vllm import LLM, SamplingParams
 from vllm.engine.arg_utils import EngineArgs
-
-# yapf: disable
 from vllm.model_executor.model_loader.tensorizer import (
     TensorizerConfig,
     TensorSerializer,
@@ -29,8 +27,6 @@ from vllm.model_executor.model_loader.tensorizer import (
 from vllm.model_executor.model_loader.tensorizer_loader import (
     BLACKLISTED_TENSORIZER_ARGS,
 )
-
-# yapf: enable
 from vllm.utils import PlaceholderModule
 
 from .conftest import DummyExecutor, assert_from_collective_rpc
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 143d60fbf9..9168778a16 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -45,18 +45,17 @@ from .vlm_utils.types import (
 if current_platform.is_rocm():
     os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
 
-# yapf: disable
 COMMON_BROADCAST_SETTINGS = {
     "test_type": VLMTestType.IMAGE,
     "dtype": "half",
     "max_tokens": 5,
     "tensor_parallel_size": 2,
     "hf_model_kwargs": {"device_map": "auto"},
-    "image_size_factors": [(.25, 0.5, 1.0)],
+    "image_size_factors": [(0.25, 0.5, 1.0)],
     "distributed_executor_backend": (
         "ray",
         "mp",
-    )
+    ),
 }
 
 ### Test configuration for specific models
@@ -96,22 +95,20 @@ VLM_TEST_SETTINGS = {
     #### Core tests to always run in the CI
     "llava": VLMTestInfo(
         models=["llava-hf/llava-1.5-7b-hf"],
-        test_type=(
-            VLMTestType.EMBEDDING,
-            VLMTestType.IMAGE,
-            VLMTestType.CUSTOM_INPUTS
-        ),
+        test_type=(VLMTestType.EMBEDDING, VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
         prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
         convert_assets_to_embeddings=model_utils.get_llava_embeddings,
         max_model_len=4096,
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-        custom_test_opts=[CustomTestOptions(
-            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
-                formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:"
-            ),
-            limit_mm_per_prompt={"image": 4},
-        )],
+        custom_test_opts=[
+            CustomTestOptions(
+                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
+                    formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:"
+                ),
+                limit_mm_per_prompt={"image": 4},
+            )
+        ],
         # TODO: Revert to "auto" when CPU backend can use torch > 2.6
         dtype="bfloat16" if current_platform.is_cpu() else "auto",
         marks=[pytest.mark.core_model, pytest.mark.cpu_model],
@@ -120,27 +117,27 @@ VLM_TEST_SETTINGS = {
         models=["google/paligemma-3b-mix-224"],
         test_type=VLMTestType.IMAGE,
         prompt_formatter=identity,
-        img_idx_to_prompt = lambda idx: "",
+        img_idx_to_prompt=lambda idx: "",
         # Paligemma uses its own sample prompts because the default one fails
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "caption es",
-            "cherry_blossom": "What is in the picture?",
-        }),
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "caption es",
+                "cherry_blossom": "What is in the picture?",
+            }
+        ),
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
         dtype="bfloat16",
-        marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")],  # noqa: E501
+        marks=[
+            pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
+        ],  # noqa: E501
     ),
     "qwen2_5_vl": VLMTestInfo(
         models=["Qwen/Qwen2.5-VL-3B-Instruct"],
-        test_type=(
-            VLMTestType.IMAGE,
-            VLMTestType.MULTI_IMAGE,
-            VLMTestType.VIDEO
-        ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -150,17 +147,13 @@ VLM_TEST_SETTINGS = {
     ),
     "qwen2_5_omni": VLMTestInfo(
         models=["Qwen/Qwen2.5-Omni-3B"],
-        test_type=(
-            VLMTestType.IMAGE,
-            VLMTestType.MULTI_IMAGE,
-            VLMTestType.VIDEO
-        ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>", # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
-        num_logprobs= 6 if current_platform.is_cpu() else 5,
+        num_logprobs=6 if current_platform.is_cpu() else 5,
         auto_cls=AutoModelForTextToWaveform,
         vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
         patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner,
@@ -168,9 +161,9 @@ VLM_TEST_SETTINGS = {
         marks=[pytest.mark.core_model, pytest.mark.cpu_model],
     ),
     "ultravox": VLMTestInfo(
-        models = ["fixie-ai/ultravox-v0_5-llama-3_2-1b"],
+        models=["fixie-ai/ultravox-v0_5-llama-3_2-1b"],
         test_type=VLMTestType.AUDIO,
-        prompt_formatter=lambda audio_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{audio_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", # noqa: E501
+        prompt_formatter=lambda audio_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{audio_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
         audio_idx_to_prompt=lambda idx: "<|audio|>",
         max_model_len=4096,
         max_num_seqs=2,
@@ -184,9 +177,11 @@ VLM_TEST_SETTINGS = {
     "llava-onevision-transformers": VLMTestInfo(
         models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
         test_type=VLMTestType.IMAGE,
-        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
+        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
         max_model_len=16384,
-        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
+        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
+            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
+        ),  # noqa: E501
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
         image_size_factors=[(0.25, 0.5, 1.0)],
@@ -201,7 +196,7 @@ VLM_TEST_SETTINGS = {
     "idefics3-transformers": VLMTestInfo(
         models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt:f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
         img_idx_to_prompt=lambda idx: "<image>",
         max_model_len=8192,
         max_num_seqs=2,
@@ -217,8 +212,8 @@ VLM_TEST_SETTINGS = {
     "qwen2_5_vl-transformers": VLMTestInfo(
         models=["Qwen/Qwen2.5-VL-3B-Instruct"],
         test_type=VLMTestType.IMAGE,
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -228,23 +223,24 @@ VLM_TEST_SETTINGS = {
             "model_impl": "transformers",
         },
         # FIXME: Investigate mrope issue
-        marks=[large_gpu_mark(min_gb=32),
-               pytest.mark.skip(reason="Mrope issue")],
+        marks=[large_gpu_mark(min_gb=32), pytest.mark.skip(reason="Mrope issue")],
     ),
     #### Extended model tests
     "aria": VLMTestInfo(
         models=["rhymes-ai/Aria"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n ", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n ",  # noqa: E501
         img_idx_to_prompt=lambda idx: "<fim_prefix><|img|><fim_suffix>\n",
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<vlm_image>Please describe the image shortly.",
-            "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
-        }),
-        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",    # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<vlm_image>Please describe the image shortly.",
+                "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
+            }
+        ),
+        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",  # noqa: E501
         stop_str=["<|im_end|>"],
         image_size_factors=[(0.10, 0.15)],
         max_tokens=64,
@@ -253,11 +249,13 @@ VLM_TEST_SETTINGS = {
     "aya_vision": VLMTestInfo(
         models=["CohereForAI/aya-vision-8b"],
         test_type=(VLMTestType.IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<image>What is the season?",  # noqa: E501
-        }),
+        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+            }
+        ),
         multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
@@ -267,11 +265,13 @@ VLM_TEST_SETTINGS = {
     "aya_vision-multi_image": VLMTestInfo(
         models=["CohereForAI/aya-vision-8b"],
         test_type=(VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<image>What is the season?",  # noqa: E501
-        }),
+        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+            }
+        ),
         multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
@@ -297,27 +297,29 @@ VLM_TEST_SETTINGS = {
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
         # For chameleon, we only compare the sequences
-        vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
-        hf_output_post_proc = lambda hf_output, model: hf_output[:2],
+        vllm_output_post_proc=lambda vllm_output, model: vllm_output[:2],
+        hf_output_post_proc=lambda hf_output, model: hf_output[:2],
         comparator=check_outputs_equal,
         max_tokens=8,
         dtype="bfloat16",
     ),
     "deepseek_vl_v2": VLMTestInfo(
-        models=["Isotr0py/deepseek-vl2-tiny"], # model repo using dynamic module
+        models=["Isotr0py/deepseek-vl2-tiny"],  # model repo using dynamic module
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
-            "cherry_blossom": "<image>\nPlease infer the season with reason in details.",   # noqa: E501
-        }),
-        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",    # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<image>\nPlease infer the season with reason in details.",  # noqa: E501
+            }
+        ),
+        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",  # noqa: E501
         patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
         hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
         stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],  # noqa: E501
-        image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
+        image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
     ),
     "fuyu": VLMTestInfo(
         models=["adept/fuyu-8b"],
@@ -336,11 +338,13 @@ VLM_TEST_SETTINGS = {
     "gemma3": VLMTestInfo(
         models=["google/gemma-3-4b-it"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n", # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
-        }),
+        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
+            }
+        ),
         multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
@@ -353,10 +357,12 @@ VLM_TEST_SETTINGS = {
         models=["zai-org/glm-4v-9b"],
         test_type=VLMTestType.IMAGE,
         prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<|begin_of_image|><|endoftext|><|end_of_image|>What is the season?",  # noqa: E501
-        }),
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<|begin_of_image|><|endoftext|><|end_of_image|>What is the season?",  # noqa: E501
+            }
+        ),
         max_model_len=2048,
         max_num_seqs=2,
         get_stop_token_ids=lambda tok: [151329, 151336, 151338],
@@ -372,8 +378,8 @@ VLM_TEST_SETTINGS = {
         models=["zai-org/GLM-4.1V-9B-Thinking"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
         prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>", # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>", # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",  # noqa: E501
         max_model_len=2048,
         max_num_seqs=2,
         get_stop_token_ids=lambda tok: [151329, 151336, 151338],
@@ -390,23 +396,27 @@ VLM_TEST_SETTINGS = {
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
         patch_hf_runner=model_utils.glm4_1v_patch_hf_runner,
-        custom_test_opts=[CustomTestOptions(
-            inputs=custom_inputs.video_with_metadata_glm4_1v(),
-            limit_mm_per_prompt={"video": 1},
-        )],
+        custom_test_opts=[
+            CustomTestOptions(
+                inputs=custom_inputs.video_with_metadata_glm4_1v(),
+                limit_mm_per_prompt={"video": 1},
+            )
+        ],
         marks=[large_gpu_mark(min_gb=32)],
     ),
     "h2ovl": VLMTestInfo(
-        models = [
+        models=[
             "h2oai/h2ovl-mississippi-800m",
             "h2oai/h2ovl-mississippi-2b",
         ],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>", # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<image>\nWhat is the season?",
-        }),
+        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<image>\nWhat is the season?",
+            }
+        ),
         multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
         max_model_len=8192,
         use_tokenizer_eos=True,
@@ -416,7 +426,7 @@ VLM_TEST_SETTINGS = {
     "idefics3": VLMTestInfo(
         models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt:f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
         img_idx_to_prompt=lambda idx: "<image>",
         max_model_len=8192,
         max_num_seqs=2,
@@ -431,11 +441,13 @@ VLM_TEST_SETTINGS = {
             # "OpenGVLab/Mono-InternVL-2B",
         ],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<image>\nWhat is the season?",
-        }),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<image>\nWhat is the season?",
+            }
+        ),
         multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
         max_model_len=4096,
         use_tokenizer_eos=True,
@@ -446,7 +458,7 @@ VLM_TEST_SETTINGS = {
             "OpenGVLab/InternVL3-1B",
         ],
         test_type=VLMTestType.VIDEO,
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
         video_idx_to_prompt=lambda idx: "<video>",
         max_model_len=8192,
         use_tokenizer_eos=True,
@@ -459,7 +471,7 @@ VLM_TEST_SETTINGS = {
             VLMTestType.MULTI_IMAGE,
             VLMTestType.VIDEO,
         ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
         img_idx_to_prompt=lambda idx: "<IMG_CONTEXT>",
         video_idx_to_prompt=lambda idx: "<video>",
         max_model_len=8192,
@@ -469,7 +481,7 @@ VLM_TEST_SETTINGS = {
     "kimi_vl": VLMTestInfo(
         models=["moonshotai/Kimi-VL-A3B-Instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|im_user|>user<|im_middle|>{img_prompt}<|im_end|><|im_assistant|>assistant<|im_middle|>", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_user|>user<|im_middle|>{img_prompt}<|im_end|><|im_assistant|>assistant<|im_middle|>",  # noqa: E501
         img_idx_to_prompt=lambda _: "<|media_start|>image<|media_content|><|media_pad|><|media_end|>",  # noqa: E501
         max_model_len=8192,
         max_num_seqs=2,
@@ -480,11 +492,11 @@ VLM_TEST_SETTINGS = {
     ),
     "llama4": VLMTestInfo(
         models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
-        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n",  # noqa: E501
         img_idx_to_prompt=lambda _: "<|image|>",
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
         distributed_executor_backend="mp",
-        image_size_factors=[(.25, 0.5, 1.0)],
+        image_size_factors=[(0.25, 0.5, 1.0)],
         hf_model_kwargs={"device_map": "auto"},
         max_model_len=8192,
         max_num_seqs=4,
@@ -500,28 +512,34 @@ VLM_TEST_SETTINGS = {
         max_model_len=10240,
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-        custom_test_opts=[CustomTestOptions(
-            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
-                formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]"
-            ),
-            limit_mm_per_prompt={"image": 4},
-        )],
+        custom_test_opts=[
+            CustomTestOptions(
+                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
+                    formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]"
+                ),
+                limit_mm_per_prompt={"image": 4},
+            )
+        ],
     ),
     "llava_onevision": VLMTestInfo(
         models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
         test_type=VLMTestType.CUSTOM_INPUTS,
-        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
+        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
         num_video_frames=16,
         max_model_len=16384,
-        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
+        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
+            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
+        ),  # noqa: E501
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
-        custom_test_opts=[CustomTestOptions(
-            inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs(
-                formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",   # noqa: E501
-            ),
-            limit_mm_per_prompt={"video": 4},
-        )],
+        custom_test_opts=[
+            CustomTestOptions(
+                inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs(
+                    formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+                ),
+                limit_mm_per_prompt={"video": 4},
+            )
+        ],
     ),
     "llava_next_video": VLMTestInfo(
         models=["llava-hf/LLaVA-NeXT-Video-7B-hf"],
@@ -563,7 +581,9 @@ VLM_TEST_SETTINGS = {
         img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
         max_model_len=4096,
         max_num_seqs=2,
-        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']),  # noqa: E501
+        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
+            ["<|im_end|>", "<|endoftext|>"]
+        ),  # noqa: E501
         hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
         patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
         # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
@@ -576,13 +596,15 @@ VLM_TEST_SETTINGS = {
         img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
         max_model_len=4096,
         max_num_seqs=2,
-        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']),  # noqa: E501
+        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
+            ["<|im_end|>", "<|endoftext|>"]
+        ),  # noqa: E501
         hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
         patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
     ),
     "minimax_vl_01": VLMTestInfo(
         models=["MiniMaxAI/MiniMax-VL-01"],
-        prompt_formatter=lambda img_prompt: f"<beginning_of_sentence>user: {img_prompt} assistant:<end_of_sentence>", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<beginning_of_sentence>user: {img_prompt} assistant:<end_of_sentence>",  # noqa: E501
         img_idx_to_prompt=lambda _: "<image>",
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
         max_model_len=8192,
@@ -604,8 +626,8 @@ VLM_TEST_SETTINGS = {
     "ovis1_6-gemma2": VLMTestInfo(
         models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
         dtype="half",
@@ -617,8 +639,8 @@ VLM_TEST_SETTINGS = {
     "ovis2": VLMTestInfo(
         models=["AIDC-AI/Ovis2-1B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
         dtype="half",
@@ -628,13 +650,9 @@ VLM_TEST_SETTINGS = {
     ),
     "ovis2_5": VLMTestInfo(
         models=["AIDC-AI/Ovis2.5-2B"],
-        test_type=(
-            VLMTestType.IMAGE,
-            VLMTestType.MULTI_IMAGE,
-            VLMTestType.VIDEO
-        ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
         video_idx_to_prompt=lambda idx: "<video>\n",
         max_model_len=4096,
         max_num_seqs=2,
@@ -646,7 +664,7 @@ VLM_TEST_SETTINGS = {
     "phi3v": VLMTestInfo(
         models=["microsoft/Phi-3.5-vision-instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|end|>\n<|assistant|>\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|end|>\n<|assistant|>\n",  # noqa: E501
         img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
         max_model_len=4096,
         max_num_seqs=2,
@@ -681,15 +699,11 @@ VLM_TEST_SETTINGS = {
     ),
     "qwen2_vl": VLMTestInfo(
         models=["Qwen/Qwen2-VL-2B-Instruct"],
-        test_type=(
-            VLMTestType.IMAGE,
-            VLMTestType.MULTI_IMAGE,
-            VLMTestType.VIDEO
-        ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
-        multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",    # noqa: E501
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -700,11 +714,13 @@ VLM_TEST_SETTINGS = {
     "skywork_r1v": VLMTestInfo(
         models=["Skywork/Skywork-R1V-38B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n", # noqa: E501
-        single_image_prompts=IMAGE_ASSETS.prompts({
-            "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
-            "cherry_blossom": "<image>\nWhat is the season?",
-        }),
+        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<image>\nWhat is the season?",
+            }
+        ),
         multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",  # noqa: E501
         max_model_len=4096,
         use_tokenizer_eos=True,
@@ -737,9 +753,9 @@ VLM_TEST_SETTINGS = {
             VLMTestType.MULTI_IMAGE,
             VLMTestType.VIDEO,
         ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -752,11 +768,11 @@ VLM_TEST_SETTINGS = {
         prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
         max_model_len=4096,
         auto_cls=AutoModelForImageTextToText,
-        vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
-        hf_output_post_proc = lambda hf_output, model: hf_output[:2],
+        vllm_output_post_proc=lambda vllm_output, model: vllm_output[:2],
+        hf_output_post_proc=lambda hf_output, model: hf_output[:2],
         comparator=check_outputs_equal,
         marks=multi_gpu_marks(num_gpus=2),
-        **COMMON_BROADCAST_SETTINGS # type: ignore
+        **COMMON_BROADCAST_SETTINGS,  # type: ignore
     ),
     "llava-broadcast": VLMTestInfo(
         models=["llava-hf/llava-1.5-7b-hf"],
@@ -765,7 +781,7 @@ VLM_TEST_SETTINGS = {
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
         marks=multi_gpu_marks(num_gpus=2),
-        **COMMON_BROADCAST_SETTINGS # type: ignore
+        **COMMON_BROADCAST_SETTINGS,  # type: ignore
     ),
     "llava_next-broadcast": VLMTestInfo(
         models=["llava-hf/llava-v1.6-mistral-7b-hf"],
@@ -774,12 +790,12 @@ VLM_TEST_SETTINGS = {
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
         marks=multi_gpu_marks(num_gpus=2),
-        **COMMON_BROADCAST_SETTINGS # type: ignore
+        **COMMON_BROADCAST_SETTINGS,  # type: ignore
     ),
     ### Custom input edge-cases for specific models
     "intern_vl-diff-patches": VLMTestInfo(
         models=["OpenGVLab/InternVL2-2B"],
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
         test_type=VLMTestType.CUSTOM_INPUTS,
         max_model_len=4096,
         use_tokenizer_eos=True,
@@ -788,7 +804,8 @@ VLM_TEST_SETTINGS = {
             CustomTestOptions(
                 inputs=inp,
                 limit_mm_per_prompt={"image": 2},
-            ) for inp in custom_inputs.different_patch_input_cases_internvl()
+            )
+            for inp in custom_inputs.different_patch_input_cases_internvl()
         ],
     ),
     "llava_onevision-multiple-images": VLMTestInfo(
@@ -797,14 +814,18 @@ VLM_TEST_SETTINGS = {
         max_model_len=16384,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
-        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),   # noqa: E501
+        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
+            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
+        ),  # noqa: E501
         vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
-        custom_test_opts=[CustomTestOptions(
-            inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
-                formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-            ),
-            limit_mm_per_prompt={"image": 4},
-        )],
+        custom_test_opts=[
+            CustomTestOptions(
+                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
+                    formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+                ),
+                limit_mm_per_prompt={"image": 4},
+            )
+        ],
     ),
     # regression test for https://github.com/vllm-project/vllm/issues/15122
     "qwen2_5_vl-windows-attention": VLMTestInfo(
@@ -814,13 +835,14 @@ VLM_TEST_SETTINGS = {
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
-        custom_test_opts=[CustomTestOptions(
-            inputs=custom_inputs.windows_attention_image_qwen2_5_vl(),
-            limit_mm_per_prompt={"image": 1},
-        )],
+        custom_test_opts=[
+            CustomTestOptions(
+                inputs=custom_inputs.windows_attention_image_qwen2_5_vl(),
+                limit_mm_per_prompt={"image": 1},
+            )
+        ],
     ),
 }
-# yapf: enable
 
 
 def _mark_splits(
diff --git a/tests/models/multimodal/generation/vlm_utils/case_filtering.py b/tests/models/multimodal/generation/vlm_utils/case_filtering.py
index 89d5829fd0..77e478e53c 100644
--- a/tests/models/multimodal/generation/vlm_utils/case_filtering.py
+++ b/tests/models/multimodal/generation/vlm_utils/case_filtering.py
@@ -114,7 +114,6 @@ def get_parametrized_options(
                 raise ValueError("Test has type CUSTOM_INPUTS, but none given")
             iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
 
-        # yapf: disable
         # Wrap all model cases in a pytest parameter & pass marks through
         return [
             pytest.param(
@@ -122,10 +121,10 @@ def get_parametrized_options(
                 ExpandableVLMTestArgs(
                     **{k: v for k, v in zip(iter_kwargs.keys(), case)}
                 ),
-                marks=test_info.marks if test_info.marks is not None else []
-            ) for case in list(itertools.product(*iter_kwargs.values()))
+                marks=test_info.marks if test_info.marks is not None else [],
+            )
+            for case in list(itertools.product(*iter_kwargs.values()))
         ]
-        # yapf: enable
 
     # Get a list per model type, where each entry contains a tuple of all of
     # that model type's cases, then flatten them into the top level so that
diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py
index d217f28904..f924bea9f4 100644
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -418,7 +418,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
             self.image_size = self.vision_config.image_size
 
         def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
-            # yapf: disable
             from vllm.model_executor.models.h2ovl import (
                 IMG_CONTEXT,
                 IMG_END,
@@ -426,7 +425,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
                 image_to_pixel_values_h2ovl,
             )
 
-            # yapf: enable
             images = [images] if isinstance(images, Image) else images
             pixel_values = [
                 image_to_pixel_values_h2ovl(
diff --git a/tests/models/multimodal/generation/vlm_utils/types.py b/tests/models/multimodal/generation/vlm_utils/types.py
index a28e85f91a..bb34d1cc6d 100644
--- a/tests/models/multimodal/generation/vlm_utils/types.py
+++ b/tests/models/multimodal/generation/vlm_utils/types.py
@@ -33,24 +33,26 @@ TEST_IMG_PLACEHOLDER = "<vlm_image>"
 TEST_VIDEO_PLACEHOLDER = "<vlm_video>"
 TEST_AUDIO_PLACEHOLDER = "<lmm_audio>"
 
-# yapf: disable
-SINGLE_IMAGE_BASE_PROMPTS = IMAGE_ASSETS.prompts({
-    "stop_sign": f"{TEST_IMG_PLACEHOLDER}What's the content of the image?",
-    "cherry_blossom": f"{TEST_IMG_PLACEHOLDER}What is the season?",
-})
-SINGLE_AUDIO_BASE_PROMPT = AUDIO_ASSETS.prompts({
-    "mary_had_lamb": f"{TEST_AUDIO_PLACEHOLDER}Transcribe this audio into English.",    # noqa: E501
-    "winning_call": f"{TEST_AUDIO_PLACEHOLDER}What is happening in this audio clip?",     # noqa: E501
-})
+SINGLE_IMAGE_BASE_PROMPTS = IMAGE_ASSETS.prompts(
+    {
+        "stop_sign": f"{TEST_IMG_PLACEHOLDER}What's the content of the image?",
+        "cherry_blossom": f"{TEST_IMG_PLACEHOLDER}What is the season?",
+    }
+)
+SINGLE_AUDIO_BASE_PROMPT = AUDIO_ASSETS.prompts(
+    {
+        "mary_had_lamb": f"{TEST_AUDIO_PLACEHOLDER}Transcribe this audio into English.",  # noqa: E501
+        "winning_call": f"{TEST_AUDIO_PLACEHOLDER}What is happening in this audio clip?",  # noqa: E501
+    }
+)
 
 MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PLACEHOLDER}Describe the two images in detail.\n"  # noqa: E501
 VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
 
 
-IMAGE_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
-EMBEDDING_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0)]
+IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
+EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)]
 RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
-# yapf: enable
 
 
 class PromptWithMultiModalInput(NamedTuple):
diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py
index 4f6a5649b4..d9d85f7e0c 100644
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@@ -322,80 +322,81 @@ def _test_processing_correctness_one(
         )
 
 
-# yapf: disable
-@pytest.mark.parametrize("model_id", [
-    "rhymes-ai/Aria",
-    "CohereForAI/aya-vision-8b",
-    "Salesforce/blip2-opt-2.7b",
-    "facebook/chameleon-7b",
-    "CohereLabs/command-a-vision-07-2025",
-    "deepseek-ai/deepseek-vl2-tiny",
-    "baidu/ERNIE-4.5-VL-28B-A3B-PT",
-    "adept/fuyu-8b",
-    "google/gemma-3-4b-it",
-    "google/gemma-3n-E2B-it",
-    "zai-org/glm-4v-9b",
-    "zai-org/GLM-4.1V-9B-Thinking",
-    "zai-org/GLM-4.5V",
-    "ibm-granite/granite-speech-3.3-2b",
-    "h2oai/h2ovl-mississippi-800m",
-    "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
-    "HuggingFaceM4/Idefics3-8B-Llama3",
-    "internlm/Intern-S1",
-    "OpenGVLab/InternVL2-1B",
-    "OpenGVLab/InternVL3-1B",
-    "OpenGVLab/InternVL3_5-1B",
-    "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview",
-    "OpenGVLab/InternVL3_5-30B-A3B",
-    "Kwai-Keye/Keye-VL-8B-Preview",
-    "Kwai-Keye/Keye-VL-1_5-8B",
-    "moonshotai/Kimi-VL-A3B-Instruct",
-    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-    "llava-hf/llava-1.5-7b-hf",
-    "llava-hf/llava-v1.6-mistral-7b-hf",
-    "llava-hf/LLaVA-NeXT-Video-7B-hf",
-    "llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
-    "TIGER-Lab/Mantis-8B-siglip-llama3",
-    "mispeech/midashenglm-7b",
-    "openbmb/MiniCPM-Llama3-V-2_5",
-    "openbmb/MiniCPM-o-2_6",
-    "openbmb/MiniCPM-V-2_6",
-    "MiniMaxAI/MiniMax-VL-01",
-    "allenai/Molmo-7B-D-0924",
-    "allenai/Molmo-7B-O-0924",
-    "nvidia/NVLM-D-72B",
-    "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1",
-    "AIDC-AI/Ovis1.6-Gemma2-9B",
-    "AIDC-AI/Ovis1.6-Llama3.2-3B",
-    "AIDC-AI/Ovis2-1B",
-    "AIDC-AI/Ovis2.5-2B",
-    "google/paligemma-3b-mix-224",
-    "google/paligemma2-3b-ft-docci-448",
-    "microsoft/Phi-3.5-vision-instruct",
-    "microsoft/Phi-4-multimodal-instruct",
-    "mistralai/Pixtral-12B-2409",
-    "mistral-community/pixtral-12b",
-    "Qwen/Qwen-VL-Chat",
-    "Qwen/Qwen2-VL-2B-Instruct",
-    "Qwen/Qwen2.5-VL-3B-Instruct",
-    "Qwen/Qwen2-Audio-7B-Instruct",
-    "Qwen/Qwen2.5-Omni-3B",
-    "Qwen/Qwen3-VL-4B-Instruct",
-    "Qwen/Qwen3-VL-30B-A3B-Instruct",
-    "YannQi/R-4B",
-    "Skywork/Skywork-R1V-38B",
-    "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
-    "stepfun-ai/step3",
-    "fixie-ai/ultravox-v0_5-llama-3_2-1b",
-    "openai/whisper-large-v3",
-    "omni-research/Tarsier-7b",
-    "omni-research/Tarsier2-Recap-7b",
-    "mistralai/Voxtral-Mini-3B-2507",
-])
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "rhymes-ai/Aria",
+        "CohereForAI/aya-vision-8b",
+        "Salesforce/blip2-opt-2.7b",
+        "facebook/chameleon-7b",
+        "CohereLabs/command-a-vision-07-2025",
+        "deepseek-ai/deepseek-vl2-tiny",
+        "baidu/ERNIE-4.5-VL-28B-A3B-PT",
+        "adept/fuyu-8b",
+        "google/gemma-3-4b-it",
+        "google/gemma-3n-E2B-it",
+        "zai-org/glm-4v-9b",
+        "zai-org/GLM-4.1V-9B-Thinking",
+        "zai-org/GLM-4.5V",
+        "ibm-granite/granite-speech-3.3-2b",
+        "h2oai/h2ovl-mississippi-800m",
+        "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
+        "HuggingFaceM4/Idefics3-8B-Llama3",
+        "internlm/Intern-S1",
+        "OpenGVLab/InternVL2-1B",
+        "OpenGVLab/InternVL3-1B",
+        "OpenGVLab/InternVL3_5-1B",
+        "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview",
+        "OpenGVLab/InternVL3_5-30B-A3B",
+        "Kwai-Keye/Keye-VL-8B-Preview",
+        "Kwai-Keye/Keye-VL-1_5-8B",
+        "moonshotai/Kimi-VL-A3B-Instruct",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "llava-hf/llava-1.5-7b-hf",
+        "llava-hf/llava-v1.6-mistral-7b-hf",
+        "llava-hf/LLaVA-NeXT-Video-7B-hf",
+        "llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
+        "TIGER-Lab/Mantis-8B-siglip-llama3",
+        "mispeech/midashenglm-7b",
+        "openbmb/MiniCPM-Llama3-V-2_5",
+        "openbmb/MiniCPM-o-2_6",
+        "openbmb/MiniCPM-V-2_6",
+        "MiniMaxAI/MiniMax-VL-01",
+        "allenai/Molmo-7B-D-0924",
+        "allenai/Molmo-7B-O-0924",
+        "nvidia/NVLM-D-72B",
+        "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1",
+        "AIDC-AI/Ovis1.6-Gemma2-9B",
+        "AIDC-AI/Ovis1.6-Llama3.2-3B",
+        "AIDC-AI/Ovis2-1B",
+        "AIDC-AI/Ovis2.5-2B",
+        "google/paligemma-3b-mix-224",
+        "google/paligemma2-3b-ft-docci-448",
+        "microsoft/Phi-3.5-vision-instruct",
+        "microsoft/Phi-4-multimodal-instruct",
+        "mistralai/Pixtral-12B-2409",
+        "mistral-community/pixtral-12b",
+        "Qwen/Qwen-VL-Chat",
+        "Qwen/Qwen2-VL-2B-Instruct",
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+        "Qwen/Qwen2-Audio-7B-Instruct",
+        "Qwen/Qwen2.5-Omni-3B",
+        "Qwen/Qwen3-VL-4B-Instruct",
+        "Qwen/Qwen3-VL-30B-A3B-Instruct",
+        "YannQi/R-4B",
+        "Skywork/Skywork-R1V-38B",
+        "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
+        "stepfun-ai/step3",
+        "fixie-ai/ultravox-v0_5-llama-3_2-1b",
+        "openai/whisper-large-v3",
+        "omni-research/Tarsier-7b",
+        "omni-research/Tarsier2-Recap-7b",
+        "mistralai/Voxtral-Mini-3B-2507",
+    ],
+)
 @pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
 @pytest.mark.parametrize("num_batches", [32])
 @pytest.mark.parametrize("simplify_rate", [1.0])
-# yapf: enable
 def test_processing_correctness(
     model_id: str,
     hit_rate: float,
diff --git a/tests/models/multimodal/processing/test_idefics3.py b/tests/models/multimodal/processing/test_idefics3.py
index 2028d13943..351b9d018e 100644
--- a/tests/models/multimodal/processing/test_idefics3.py
+++ b/tests/models/multimodal/processing/test_idefics3.py
@@ -12,7 +12,6 @@ from ...utils import build_model_context
 
 
 @pytest.mark.parametrize("model_id", ["HuggingFaceM4/Idefics3-8B-Llama3"])
-# yapf: disable
 @pytest.mark.parametrize(
     ("mm_processor_kwargs", "expected_toks_per_img"),
     [
@@ -20,7 +19,6 @@ from ...utils import build_model_context
         ({"size": {"longest_edge": 728}}, 169 * (2**2 + 1)),
     ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(
diff --git a/tests/models/multimodal/processing/test_phi3v.py b/tests/models/multimodal/processing/test_phi3v.py
index 436f029f9f..8faff2611e 100644
--- a/tests/models/multimodal/processing/test_phi3v.py
+++ b/tests/models/multimodal/processing/test_phi3v.py
@@ -11,7 +11,6 @@ from ...utils import build_model_context
 
 
 @pytest.mark.parametrize("model_id", ["microsoft/Phi-3.5-vision-instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
     ("mm_processor_kwargs", "expected_toks_per_img"),
     [
@@ -21,7 +20,6 @@ from ...utils import build_model_context
         ({}, 757),
     ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(
diff --git a/tests/models/multimodal/processing/test_phi4mm.py b/tests/models/multimodal/processing/test_phi4mm.py
index b6759342ff..5391555c26 100644
--- a/tests/models/multimodal/processing/test_phi4mm.py
+++ b/tests/models/multimodal/processing/test_phi4mm.py
@@ -11,7 +11,6 @@ from ...utils import build_model_context
 
 
 @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-multimodal-instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
     ("mm_processor_kwargs", "expected_toks_per_img"),
     [
@@ -21,7 +20,6 @@ from ...utils import build_model_context
         ({}, 9585),
     ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(
diff --git a/tests/models/multimodal/processing/test_qwen2_vl.py b/tests/models/multimodal/processing/test_qwen2_vl.py
index eddcd329ba..9f4cdb6789 100644
--- a/tests/models/multimodal/processing/test_qwen2_vl.py
+++ b/tests/models/multimodal/processing/test_qwen2_vl.py
@@ -10,7 +10,6 @@ from ...utils import build_model_context
 
 
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
     ("mm_processor_kwargs", "expected_toks_per_img", "expected_pixels_shape"),
     [
@@ -18,7 +17,6 @@ from ...utils import build_model_context
         ({"min_pixels": 64**2, "max_pixels": 512**2}, 330, (1320, 1176)),
     ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(
diff --git a/tests/models/multimodal/processing/test_smolvlm.py b/tests/models/multimodal/processing/test_smolvlm.py
index 20018513d0..6f77d5516d 100644
--- a/tests/models/multimodal/processing/test_smolvlm.py
+++ b/tests/models/multimodal/processing/test_smolvlm.py
@@ -12,7 +12,6 @@ from ...utils import build_model_context
 
 
 @pytest.mark.parametrize("model_id", ["HuggingFaceTB/SmolVLM2-2.2B-Instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
     ("mm_processor_kwargs", "expected_toks_per_img"),
     [
@@ -20,7 +19,6 @@ from ...utils import build_model_context
         ({"max_image_size": {"longest_edge": 768}}, 405),
     ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(
diff --git a/tests/models/multimodal/processing/test_transformers.py b/tests/models/multimodal/processing/test_transformers.py
index c0e043ade7..e2a2186f47 100644
--- a/tests/models/multimodal/processing/test_transformers.py
+++ b/tests/models/multimodal/processing/test_transformers.py
@@ -7,9 +7,7 @@ from vllm.config import ModelConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY
 
 
-# yapf: disable
-@pytest.mark.parametrize("model_id",
-                         ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
+@pytest.mark.parametrize("model_id", ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
 def test_multimodal_processor(model_id):
     model_config = ModelConfig(
         model=model_id,
@@ -18,9 +16,9 @@ def test_multimodal_processor(model_id):
 
     mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config)
 
-    image_pil = ImageAsset('cherry_blossom').pil_image
+    image_pil = ImageAsset("cherry_blossom").pil_image
     mm_data = {"image": image_pil}
-    str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n" # noqa: E501
+    str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n"  # noqa: E501
     str_processed_inputs = mm_processor.apply(
         prompt=str_prompt,
         mm_data=mm_data,
@@ -28,8 +26,23 @@ def test_multimodal_processor(model_id):
     )
 
     ids_prompt = [
-        151644, 872, 220, 151646, 198, 3838, 374, 279, 2213, 315, 419, 2168,
-        30, 151645, 151644, 77091, 198
+        151644,
+        872,
+        220,
+        151646,
+        198,
+        3838,
+        374,
+        279,
+        2213,
+        315,
+        419,
+        2168,
+        30,
+        151645,
+        151644,
+        77091,
+        198,
     ]
     ids_processed_inputs = mm_processor.apply(
         prompt=ids_prompt,
@@ -37,5 +50,7 @@ def test_multimodal_processor(model_id):
         hf_processor_mm_kwargs={},
     )
 
-    assert (str_processed_inputs["prompt_token_ids"]
-            == ids_processed_inputs["prompt_token_ids"])
+    assert (
+        str_processed_inputs["prompt_token_ids"]
+        == ids_processed_inputs["prompt_token_ids"]
+    )
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 292c5c222d..e1d9f1d1dd 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -169,430 +169,625 @@ class _HfExamplesInfo:
                 pytest.skip(msg)
 
 
-# yapf: disable
 _TEXT_GENERATION_EXAMPLE_MODELS = {
     # [Decoder-only]
-    "ApertusForCausalLM": _HfExamplesInfo("swiss-ai/Apertus-8B-2509",
-                                          min_transformers_version="4.56.0",
-                                          trust_remote_code=True),
-    "AquilaModel": _HfExamplesInfo("BAAI/AquilaChat-7B",
-                                   trust_remote_code=True),
-    "AquilaForCausalLM": _HfExamplesInfo("BAAI/AquilaChat2-7B",
-                                         trust_remote_code=True),
+    "ApertusForCausalLM": _HfExamplesInfo(
+        "swiss-ai/Apertus-8B-2509",
+        min_transformers_version="4.56.0",
+        trust_remote_code=True,
+    ),
+    "AquilaModel": _HfExamplesInfo("BAAI/AquilaChat-7B", trust_remote_code=True),
+    "AquilaForCausalLM": _HfExamplesInfo("BAAI/AquilaChat2-7B", trust_remote_code=True),
     "ArceeForCausalLM": _HfExamplesInfo("arcee-ai/AFM-4.5B-Base"),
-    "ArcticForCausalLM": _HfExamplesInfo("Snowflake/snowflake-arctic-instruct",
-                                         trust_remote_code=True),
-    "BaiChuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan-7B",
-                                         trust_remote_code=True),
-    "BaichuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan2-7B-chat",
-                                         trust_remote_code=True),
-    "BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
-                                         trust_remote_code=True),
-    "BailingMoeV2ForCausalLM": _HfExamplesInfo("inclusionAI/Ling-mini-2.0",
-                                         trust_remote_code=True),
-    "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1",
-                                        min_transformers_version="4.55.3",
-                                        extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}),  # noqa: E501
-    "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
-                                        {"1b": "bigscience/bloomz-1b1"}),
-    "ChatGLMModel": _HfExamplesInfo("zai-org/chatglm3-6b",
-                                    trust_remote_code=True,
-                                    max_transformers_version="4.48"),
-    "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3",  # noqa: E501
-                                                       trust_remote_code=True),
-    "CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01",
-                                         trust_remote_code=True),
-    "Cohere2ForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r7b-12-2024", # noqa: E501
-                                         trust_remote_code=True),
-    "CwmForCausalLM": _HfExamplesInfo("facebook/cwm", # noqa: E501
-                                      trust_remote_code=True,
-                                      is_available_online=False),
+    "ArcticForCausalLM": _HfExamplesInfo(
+        "Snowflake/snowflake-arctic-instruct", trust_remote_code=True
+    ),
+    "BaiChuanForCausalLM": _HfExamplesInfo(
+        "baichuan-inc/Baichuan-7B", trust_remote_code=True
+    ),
+    "BaichuanForCausalLM": _HfExamplesInfo(
+        "baichuan-inc/Baichuan2-7B-chat", trust_remote_code=True
+    ),
+    "BailingMoeForCausalLM": _HfExamplesInfo(
+        "inclusionAI/Ling-lite-1.5", trust_remote_code=True
+    ),
+    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
+        "inclusionAI/Ling-mini-2.0", trust_remote_code=True
+    ),
+    "BambaForCausalLM": _HfExamplesInfo(
+        "ibm-ai-platform/Bamba-9B-v1",
+        min_transformers_version="4.55.3",
+        extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"},
+    ),
+    "BloomForCausalLM": _HfExamplesInfo(
+        "bigscience/bloom-560m", {"1b": "bigscience/bloomz-1b1"}
+    ),
+    "ChatGLMModel": _HfExamplesInfo(
+        "zai-org/chatglm3-6b", trust_remote_code=True, max_transformers_version="4.48"
+    ),
+    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
+        "thu-coai/ShieldLM-6B-chatglm3",
+        trust_remote_code=True,
+    ),
+    "CohereForCausalLM": _HfExamplesInfo(
+        "CohereForAI/c4ai-command-r-v01", trust_remote_code=True
+    ),
+    "Cohere2ForCausalLM": _HfExamplesInfo(
+        "CohereForAI/c4ai-command-r7b-12-2024",
+        trust_remote_code=True,
+    ),
+    "CwmForCausalLM": _HfExamplesInfo(
+        "facebook/cwm",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
     "DbrxForCausalLM": _HfExamplesInfo("databricks/dbrx-instruct"),
-    "DeciLMForCausalLM": _HfExamplesInfo("nvidia/Llama-3_3-Nemotron-Super-49B-v1", # noqa: E501
-                                         trust_remote_code=True),
+    "DeciLMForCausalLM": _HfExamplesInfo(
+        "nvidia/Llama-3_3-Nemotron-Super-49B-v1",
+        trust_remote_code=True,
+    ),
     "DeepseekForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-llm-7b-chat"),
-    "DeepseekV2ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V2-Lite-Chat",  # noqa: E501
-                                         trust_remote_code=True),
-    "DeepseekV3ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3",  # noqa: E501
-                                         trust_remote_code=True),
+    "DeepseekV2ForCausalLM": _HfExamplesInfo(
+        "deepseek-ai/DeepSeek-V2-Lite-Chat",
+        trust_remote_code=True,
+    ),
+    "DeepseekV3ForCausalLM": _HfExamplesInfo(
+        "deepseek-ai/DeepSeek-V3",
+        trust_remote_code=True,
+    ),
     "DeepseekV32ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3.2-Exp"),
-    "Ernie4_5ForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-0.3B-PT",
-                                            min_transformers_version="4.54"),
-    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-21B-A3B-PT",
-                                               min_transformers_version="4.54"),
-    "ExaoneForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
-                                         trust_remote_code=True),
-    "Exaone4ForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-4.0-32B",
-                                          min_transformers_version="4.54"),
-    "Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"),  # noqa: E501
+    "Ernie4_5ForCausalLM": _HfExamplesInfo(
+        "baidu/ERNIE-4.5-0.3B-PT", min_transformers_version="4.54"
+    ),
+    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(
+        "baidu/ERNIE-4.5-21B-A3B-PT", min_transformers_version="4.54"
+    ),
+    "ExaoneForCausalLM": _HfExamplesInfo(
+        "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", trust_remote_code=True
+    ),
+    "Exaone4ForCausalLM": _HfExamplesInfo(
+        "LGAI-EXAONE/EXAONE-4.0-32B", min_transformers_version="4.54"
+    ),
+    "Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"),
     "FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),
-    "FalconH1ForCausalLM":_HfExamplesInfo("tiiuae/Falcon-H1-0.5B-Base"),
+    "FalconH1ForCausalLM": _HfExamplesInfo("tiiuae/Falcon-H1-0.5B-Base"),
     "GemmaForCausalLM": _HfExamplesInfo("google/gemma-1.1-2b-it"),
     "Gemma2ForCausalLM": _HfExamplesInfo("google/gemma-2-9b"),
     "Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
-    "Gemma3nForCausalLM": _HfExamplesInfo("google/gemma-3n-E2B-it",
-                                          min_transformers_version="4.53"),
+    "Gemma3nForCausalLM": _HfExamplesInfo(
+        "google/gemma-3n-E2B-it", min_transformers_version="4.53"
+    ),
     "GlmForCausalLM": _HfExamplesInfo("zai-org/glm-4-9b-chat-hf"),
     "Glm4ForCausalLM": _HfExamplesInfo("zai-org/GLM-4-9B-0414"),
-    "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
-                                          min_transformers_version="4.54"),   # noqa: E501
-    "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2",
-                                       {"alias": "gpt2"}),
-    "GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder",
-                                             extras={"tiny": "bigcode/tiny_starcoder_py"},  # noqa: E501
-                                             min_transformers_version="4.55.1",
-                                             transformers_version_reason="HF model broken in 4.55.0"),  # noqa: E501
-    "GPTJForCausalLM": _HfExamplesInfo("Milos/slovak-gpt-j-405M",
-                                       {"6b": "EleutherAI/gpt-j-6b"}),
-    "GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
-                                          {"1b": "EleutherAI/pythia-1.4b"}),
+    "Glm4MoeForCausalLM": _HfExamplesInfo(
+        "zai-org/GLM-4.5", min_transformers_version="4.54"
+    ),
+    "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}),
+    "GPTBigCodeForCausalLM": _HfExamplesInfo(
+        "bigcode/starcoder",
+        extras={"tiny": "bigcode/tiny_starcoder_py"},
+        min_transformers_version="4.55.1",
+        transformers_version_reason="HF model broken in 4.55.0",
+    ),
+    "GPTJForCausalLM": _HfExamplesInfo(
+        "Milos/slovak-gpt-j-405M", {"6b": "EleutherAI/gpt-j-6b"}
+    ),
+    "GPTNeoXForCausalLM": _HfExamplesInfo(
+        "EleutherAI/pythia-70m", {"1b": "EleutherAI/pythia-1.4b"}
+    ),
     "GptOssForCausalLM": _HfExamplesInfo("lmsys/gpt-oss-20b-bf16"),
     "GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
     "GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
-    "GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview", # noqa: E501
-                                                   min_transformers_version="4.55.3"),
-    "GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts"),  # noqa: E501
-    "Grok1ModelForCausalLM": _HfExamplesInfo("hpcai-tech/grok-1",
-                                             trust_remote_code=True),
-    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo("tencent/Hunyuan-A13B-Instruct",
-                                               trust_remote_code=True),
+    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
+        "ibm-granite/granite-4.0-tiny-preview",
+        min_transformers_version="4.55.3",
+    ),
+    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
+        "ibm-research/moe-7b-1b-active-shared-experts"
+    ),
+    "Grok1ModelForCausalLM": _HfExamplesInfo(
+        "hpcai-tech/grok-1", trust_remote_code=True
+    ),
+    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
+        "tencent/Hunyuan-A13B-Instruct", trust_remote_code=True
+    ),
     # TODO: Remove is_available_online once their config.json is fixed
-    "HunYuanDenseV1ForCausalLM":_HfExamplesInfo("tencent/Hunyuan-7B-Instruct-0124",
-                                                trust_remote_code=True,
-                                                is_available_online=False),
-    "InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b",
-                                           trust_remote_code=True),
-    "InternLM2ForCausalLM": _HfExamplesInfo("internlm/internlm2-chat-7b",
-                                            trust_remote_code=True),
-    "InternLM2VEForCausalLM": _HfExamplesInfo("OpenGVLab/Mono-InternVL-2B",
-                                              trust_remote_code=True),
-    "InternLM3ForCausalLM": _HfExamplesInfo("internlm/internlm3-8b-instruct",
-                                            trust_remote_code=True),
+    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(
+        "tencent/Hunyuan-7B-Instruct-0124",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
+    "InternLMForCausalLM": _HfExamplesInfo(
+        "internlm/internlm-chat-7b", trust_remote_code=True
+    ),
+    "InternLM2ForCausalLM": _HfExamplesInfo(
+        "internlm/internlm2-chat-7b", trust_remote_code=True
+    ),
+    "InternLM2VEForCausalLM": _HfExamplesInfo(
+        "OpenGVLab/Mono-InternVL-2B", trust_remote_code=True
+    ),
+    "InternLM3ForCausalLM": _HfExamplesInfo(
+        "internlm/internlm3-8b-instruct", trust_remote_code=True
+    ),
     "JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
-    "JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
-                                        min_transformers_version="4.55.3",
-                                        extras={
-                                            "tiny": "ai21labs/Jamba-tiny-dev",
-                                            "random": "ai21labs/Jamba-tiny-random",  # noqa: E501
-                                        }),
-    "Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B",
-                                       min_transformers_version="4.54"),
-    "LlamaForCausalLM": _HfExamplesInfo("meta-llama/Llama-3.2-1B-Instruct",
-                                        extras={"guard": "meta-llama/Llama-Guard-3-1B",  # noqa: E501
-                                                "hermes": "NousResearch/Hermes-3-Llama-3.1-8B", # noqa: E501
-                                                "fp8": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"}),  # noqa: E501
-    "LLaMAForCausalLM": _HfExamplesInfo("decapoda-research/llama-7b-hf",
-                                        is_available_online=False),
-    "Llama4ForCausalLM": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct", # noqa: E501
-                                         is_available_online=False),
-    "LongcatFlashForCausalLM": _HfExamplesInfo
-                ("meituan-longcat/LongCat-Flash-Chat", trust_remote_code=True),
+    "JambaForCausalLM": _HfExamplesInfo(
+        "ai21labs/AI21-Jamba-1.5-Mini",
+        min_transformers_version="4.55.3",
+        extras={
+            "tiny": "ai21labs/Jamba-tiny-dev",
+            "random": "ai21labs/Jamba-tiny-random",
+        },
+    ),
+    "Lfm2ForCausalLM": _HfExamplesInfo(
+        "LiquidAI/LFM2-1.2B", min_transformers_version="4.54"
+    ),
+    "LlamaForCausalLM": _HfExamplesInfo(
+        "meta-llama/Llama-3.2-1B-Instruct",
+        extras={
+            "guard": "meta-llama/Llama-Guard-3-1B",
+            "hermes": "NousResearch/Hermes-3-Llama-3.1-8B",
+            "fp8": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        },
+    ),
+    "LLaMAForCausalLM": _HfExamplesInfo(
+        "decapoda-research/llama-7b-hf", is_available_online=False
+    ),
+    "Llama4ForCausalLM": _HfExamplesInfo(
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        is_available_online=False,
+    ),
+    "LongcatFlashForCausalLM": _HfExamplesInfo(
+        "meituan-longcat/LongCat-Flash-Chat", trust_remote_code=True
+    ),
     "MambaForCausalLM": _HfExamplesInfo("state-spaces/mamba-130m-hf"),
-    "Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1",
-                                         min_transformers_version="4.55.3",
-                                         extras={
-                                            "random": "yujiepan/mamba2-codestral-v0.1-tiny-random", # noqa: E501
-                                         }),
-    "FalconMambaForCausalLM": _HfExamplesInfo("tiiuae/falcon-mamba-7b-instruct"),  # noqa: E501
-    "MiniCPMForCausalLM": _HfExamplesInfo("openbmb/MiniCPM-2B-sft-bf16",
-                                         trust_remote_code=True),
-    "MiniCPM3ForCausalLM": _HfExamplesInfo("openbmb/MiniCPM3-4B",
-                                         trust_remote_code=True),
+    "Mamba2ForCausalLM": _HfExamplesInfo(
+        "mistralai/Mamba-Codestral-7B-v0.1",
+        min_transformers_version="4.55.3",
+        extras={
+            "random": "yujiepan/mamba2-codestral-v0.1-tiny-random",
+        },
+    ),
+    "FalconMambaForCausalLM": _HfExamplesInfo("tiiuae/falcon-mamba-7b-instruct"),
+    "MiniCPMForCausalLM": _HfExamplesInfo(
+        "openbmb/MiniCPM-2B-sft-bf16", trust_remote_code=True
+    ),
+    "MiniCPM3ForCausalLM": _HfExamplesInfo(
+        "openbmb/MiniCPM3-4B", trust_remote_code=True
+    ),
     "MiniMaxForCausalLM": _HfExamplesInfo("MiniMaxAI/MiniMax-Text-01-hf"),
-    "MiniMaxText01ForCausalLM": _HfExamplesInfo("MiniMaxAI/MiniMax-Text-01",
-                                                trust_remote_code=True,
-                                                revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3"),  # noqa: E501
-    "MiniMaxM1ForCausalLM": _HfExamplesInfo("MiniMaxAI/MiniMax-M1-40k",
-                                            trust_remote_code=True),
+    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
+        "MiniMaxAI/MiniMax-Text-01",
+        trust_remote_code=True,
+        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
+    ),
+    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
+        "MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True
+    ),
     "MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
-    "MixtralForCausalLM": _HfExamplesInfo("mistralai/Mixtral-8x7B-Instruct-v0.1",  # noqa: E501
-                                          {"tiny": "TitanML/tiny-mixtral"}),  # noqa: E501
+    "MixtralForCausalLM": _HfExamplesInfo(
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        {"tiny": "TitanML/tiny-mixtral"},
+    ),
     "MptForCausalLM": _HfExamplesInfo("mpt", is_available_online=False),
     "MPTForCausalLM": _HfExamplesInfo("mosaicml/mpt-7b"),
     "NemotronForCausalLM": _HfExamplesInfo("nvidia/Minitron-8B-Base"),
-    "NemotronHForCausalLM": _HfExamplesInfo("nvidia/Nemotron-H-8B-Base-8K",
-                                            trust_remote_code=True),
+    "NemotronHForCausalLM": _HfExamplesInfo(
+        "nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True
+    ),
     "OlmoForCausalLM": _HfExamplesInfo("allenai/OLMo-1B-hf"),
     "Olmo2ForCausalLM": _HfExamplesInfo("allenai/OLMo-2-0425-1B"),
     "Olmo3ForCausalLM": _HfExamplesInfo("shanearora/2025-sep-a-base-model"),
     "OlmoeForCausalLM": _HfExamplesInfo("allenai/OLMoE-1B-7B-0924-Instruct"),
-    "OPTForCausalLM": _HfExamplesInfo("facebook/opt-125m",
-                                      {"1b": "facebook/opt-iml-max-1.3b"}),
-    "OrionForCausalLM": _HfExamplesInfo("OrionStarAI/Orion-14B-Chat",
-                                        trust_remote_code=True),
+    "OPTForCausalLM": _HfExamplesInfo(
+        "facebook/opt-125m", {"1b": "facebook/opt-iml-max-1.3b"}
+    ),
+    "OrionForCausalLM": _HfExamplesInfo(
+        "OrionStarAI/Orion-14B-Chat", trust_remote_code=True
+    ),
     "PersimmonForCausalLM": _HfExamplesInfo("adept/persimmon-8b-chat"),
     "PhiForCausalLM": _HfExamplesInfo("microsoft/phi-2"),
     "Phi3ForCausalLM": _HfExamplesInfo("microsoft/Phi-3-mini-4k-instruct"),
-    "PhiMoEForCausalLM": _HfExamplesInfo("microsoft/Phi-3.5-MoE-instruct",
-                                         trust_remote_code=True),
-    "Plamo2ForCausalLM": _HfExamplesInfo("pfnet/plamo-2-1b",
-                                         max_transformers_version="4.55.4",
-                                         transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
-                                         trust_remote_code=True),
-    "QWenLMHeadModel": _HfExamplesInfo("Qwen/Qwen-7B-Chat",
-                                       max_transformers_version="4.53",
-                                       transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
-                                       trust_remote_code=True),
-    "Qwen2ForCausalLM": _HfExamplesInfo("Qwen/Qwen2-0.5B-Instruct",
-                                        extras={"2.5": "Qwen/Qwen2.5-0.5B-Instruct"}), # noqa: E501
+    "PhiMoEForCausalLM": _HfExamplesInfo(
+        "microsoft/Phi-3.5-MoE-instruct", trust_remote_code=True
+    ),
+    "Plamo2ForCausalLM": _HfExamplesInfo(
+        "pfnet/plamo-2-1b",
+        max_transformers_version="4.55.4",
+        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
+        trust_remote_code=True,
+    ),
+    "QWenLMHeadModel": _HfExamplesInfo(
+        "Qwen/Qwen-7B-Chat",
+        max_transformers_version="4.53",
+        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
+        trust_remote_code=True,
+    ),
+    "Qwen2ForCausalLM": _HfExamplesInfo(
+        "Qwen/Qwen2-0.5B-Instruct", extras={"2.5": "Qwen/Qwen2.5-0.5B-Instruct"}
+    ),
     "Qwen2MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen1.5-MoE-A2.7B-Chat"),
     "Qwen3ForCausalLM": _HfExamplesInfo("Qwen/Qwen3-8B"),
     "Qwen3MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen3-30B-A3B"),
-    "Qwen3NextForCausalLM": _HfExamplesInfo("Qwen/Qwen3-Next-80B-A3B-Instruct",
-                                            extras={"tiny-random": "tiny-random/qwen3-next-moe"}, # noqa: E501
-                                            min_transformers_version="4.56.3"),
+    "Qwen3NextForCausalLM": _HfExamplesInfo(
+        "Qwen/Qwen3-Next-80B-A3B-Instruct",
+        extras={"tiny-random": "tiny-random/qwen3-next-moe"},
+        min_transformers_version="4.56.3",
+    ),
     "RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"),
-    "SeedOssForCausalLM": _HfExamplesInfo("ByteDance-Seed/Seed-OSS-36B-Instruct", # noqa: E501
-                                          trust_remote_code=True,
-                                          is_available_online=False),
+    "SeedOssForCausalLM": _HfExamplesInfo(
+        "ByteDance-Seed/Seed-OSS-36B-Instruct",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
     "SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"),
-    "StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),  # noqa: E501
+    "StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),
     "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
     "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
-    "Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3",
-                                            trust_remote_code=True),
-    "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct",
-                                        trust_remote_code=True),
-    "TeleChat2ForCausalLM": _HfExamplesInfo("Tele-AI/TeleChat2-3B",
-                                            trust_remote_code=True),
-    "TeleFLMForCausalLM": _HfExamplesInfo("CofeAI/FLM-2-52B-Instruct-2407",
-                                            trust_remote_code=True),
-    "XverseForCausalLM": _HfExamplesInfo("xverse/XVERSE-7B-Chat",
-                                         tokenizer="meta-llama/Llama-2-7b",
-                                         trust_remote_code=True),
+    "Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3", trust_remote_code=True),
+    "SolarForCausalLM": _HfExamplesInfo(
+        "upstage/solar-pro-preview-instruct", trust_remote_code=True
+    ),
+    "TeleChat2ForCausalLM": _HfExamplesInfo(
+        "Tele-AI/TeleChat2-3B", trust_remote_code=True
+    ),
+    "TeleFLMForCausalLM": _HfExamplesInfo(
+        "CofeAI/FLM-2-52B-Instruct-2407", trust_remote_code=True
+    ),
+    "XverseForCausalLM": _HfExamplesInfo(
+        "xverse/XVERSE-7B-Chat",
+        tokenizer="meta-llama/Llama-2-7b",
+        trust_remote_code=True,
+    ),
     "Zamba2ForCausalLM": _HfExamplesInfo("Zyphra/Zamba2-7B-instruct"),
-    "MiMoForCausalLM": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
-                                        trust_remote_code=True),
+    "MiMoForCausalLM": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL", trust_remote_code=True),
     "Dots1ForCausalLM": _HfExamplesInfo("rednote-hilab/dots.llm1.inst"),
 }
 
 _EMBEDDING_EXAMPLE_MODELS = {
     # [Text-only]
     "BertModel": _HfExamplesInfo("BAAI/bge-base-en-v1.5"),
-    "Gemma2Model": _HfExamplesInfo("BAAI/bge-multilingual-gemma2"),  # noqa: E501
+    "Gemma2Model": _HfExamplesInfo("BAAI/bge-multilingual-gemma2"),
     "Gemma3TextModel": _HfExamplesInfo("google/embeddinggemma-300m"),
     "GritLM": _HfExamplesInfo("parasail-ai/GritLM-7B-vllm"),
-    "GteModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
-                                               trust_remote_code=True),
-    "GteNewModel": _HfExamplesInfo("Alibaba-NLP/gte-base-en-v1.5",
-                                   trust_remote_code=True,
-                                   hf_overrides={"architectures": ["GteNewModel"]}),  # noqa: E501
-    "InternLM2ForRewardModel": _HfExamplesInfo("internlm/internlm2-1_8b-reward",
-                                               trust_remote_code=True),
-    "JambaForSequenceClassification": _HfExamplesInfo("ai21labs/Jamba-tiny-reward-dev"),  # noqa: E501
+    "GteModel": _HfExamplesInfo(
+        "Snowflake/snowflake-arctic-embed-m-v2.0", trust_remote_code=True
+    ),
+    "GteNewModel": _HfExamplesInfo(
+        "Alibaba-NLP/gte-base-en-v1.5",
+        trust_remote_code=True,
+        hf_overrides={"architectures": ["GteNewModel"]},
+    ),
+    "InternLM2ForRewardModel": _HfExamplesInfo(
+        "internlm/internlm2-1_8b-reward", trust_remote_code=True
+    ),
+    "JambaForSequenceClassification": _HfExamplesInfo("ai21labs/Jamba-tiny-reward-dev"),
     "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
     "MistralModel": _HfExamplesInfo("intfloat/e5-mistral-7b-instruct"),
-    "ModernBertModel": _HfExamplesInfo("Alibaba-NLP/gte-modernbert-base",
-                                trust_remote_code=True),
-    "NomicBertModel": _HfExamplesInfo("nomic-ai/nomic-embed-text-v2-moe",
-                                               trust_remote_code=True),  # noqa: E501
+    "ModernBertModel": _HfExamplesInfo(
+        "Alibaba-NLP/gte-modernbert-base", trust_remote_code=True
+    ),
+    "NomicBertModel": _HfExamplesInfo(
+        "nomic-ai/nomic-embed-text-v2-moe", trust_remote_code=True
+    ),
     "Qwen2Model": _HfExamplesInfo("ssmits/Qwen2-7B-Instruct-embed-base"),
-    "Qwen2ForRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-RM-72B",
-                                           max_transformers_version="4.53",
-                                           transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers"),  # noqa: E501
-    "Qwen2ForProcessRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-PRM-7B",
-                                                  max_transformers_version="4.53",
-                                                  transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers"),  # noqa: E501
-    "RobertaModel": _HfExamplesInfo("sentence-transformers/stsb-roberta-base-v2"),  # noqa: E501
-    "RobertaForMaskedLM": _HfExamplesInfo("sentence-transformers/all-roberta-large-v1"),  # noqa: E501
-    "XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-small"),  # noqa: E501
+    "Qwen2ForRewardModel": _HfExamplesInfo(
+        "Qwen/Qwen2.5-Math-RM-72B",
+        max_transformers_version="4.53",
+        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
+    ),
+    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
+        "Qwen/Qwen2.5-Math-PRM-7B",
+        max_transformers_version="4.53",
+        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
+    ),
+    "RobertaModel": _HfExamplesInfo("sentence-transformers/stsb-roberta-base-v2"),
+    "RobertaForMaskedLM": _HfExamplesInfo("sentence-transformers/all-roberta-large-v1"),
+    "XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-small"),
     # [Multimodal]
     "CLIPModel": _HfExamplesInfo("openai/clip-vit-base-patch32"),
     "LlavaNextForConditionalGeneration": _HfExamplesInfo("royokong/e5-v"),
-    "Phi3VForCausalLM": _HfExamplesInfo("TIGER-Lab/VLM2Vec-Full",
-                                         trust_remote_code=True),
-    "Qwen2VLForConditionalGeneration": _HfExamplesInfo("MrLight/dse-qwen2-2b-mrl-v1"), # noqa: E501
-    "PrithviGeoSpatialMAE": _HfExamplesInfo("ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11", # noqa: E501
-                                            dtype=torch.float16,
-                                            enforce_eager=True,
-                                            skip_tokenizer_init=True,
-                                            # This is to avoid the model
-                                            # going OOM in CI
-                                            max_num_seqs=32,
-                                            ),
-    "Terratorch": _HfExamplesInfo("ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11", # noqa: E501
-                                  dtype=torch.float16,
-                                  enforce_eager=True,
-                                  skip_tokenizer_init=True,
-                                  # This is to avoid the model going OOM in CI
-                                  max_num_seqs=32,
-                                  ),
+    "Phi3VForCausalLM": _HfExamplesInfo(
+        "TIGER-Lab/VLM2Vec-Full", trust_remote_code=True
+    ),
+    "Qwen2VLForConditionalGeneration": _HfExamplesInfo("MrLight/dse-qwen2-2b-mrl-v1"),
+    "PrithviGeoSpatialMAE": _HfExamplesInfo(
+        "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
+        dtype=torch.float16,
+        enforce_eager=True,
+        skip_tokenizer_init=True,
+        # This is to avoid the model
+        # going OOM in CI
+        max_num_seqs=32,
+    ),
+    "Terratorch": _HfExamplesInfo(
+        "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
+        dtype=torch.float16,
+        enforce_eager=True,
+        skip_tokenizer_init=True,
+        # This is to avoid the model going OOM in CI
+        max_num_seqs=32,
+    ),
 }
 
 _SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
     # [Decoder-only]
-    "GPT2ForSequenceClassification": _HfExamplesInfo("nie3e/sentiment-polish-gpt2-small"),  # noqa: E501
-
+    "GPT2ForSequenceClassification": _HfExamplesInfo(
+        "nie3e/sentiment-polish-gpt2-small"
+    ),
     # [Cross-encoder]
-    "BertForSequenceClassification": _HfExamplesInfo("cross-encoder/ms-marco-MiniLM-L-6-v2"),  # noqa: E501
+    "BertForSequenceClassification": _HfExamplesInfo(
+        "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    ),
     "BertForTokenClassification": _HfExamplesInfo("boltuix/NeuroBERT-NER"),
-    "GteNewForSequenceClassification": _HfExamplesInfo("Alibaba-NLP/gte-multilingual-reranker-base",  # noqa: E501
-                                                       trust_remote_code=True,
-                                                       hf_overrides={
-                                                           "architectures": ["GteNewForSequenceClassification"]}),# noqa: E501
-    "ModernBertForSequenceClassification": _HfExamplesInfo("Alibaba-NLP/gte-reranker-modernbert-base"), # noqa: E501
-    "RobertaForSequenceClassification": _HfExamplesInfo("cross-encoder/quora-roberta-base"),  # noqa: E501
-    "XLMRobertaForSequenceClassification": _HfExamplesInfo("BAAI/bge-reranker-v2-m3"),  # noqa: E501
+    "GteNewForSequenceClassification": _HfExamplesInfo(
+        "Alibaba-NLP/gte-multilingual-reranker-base",
+        trust_remote_code=True,
+        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
+    ),
+    "ModernBertForSequenceClassification": _HfExamplesInfo(
+        "Alibaba-NLP/gte-reranker-modernbert-base"
+    ),
+    "RobertaForSequenceClassification": _HfExamplesInfo(
+        "cross-encoder/quora-roberta-base"
+    ),
+    "XLMRobertaForSequenceClassification": _HfExamplesInfo("BAAI/bge-reranker-v2-m3"),
 }
 
 _AUTOMATIC_CONVERTED_MODELS = {
     # Use as_seq_cls_model for automatic conversion
-    "GemmaForSequenceClassification": _HfExamplesInfo("BAAI/bge-reranker-v2-gemma",  # noqa: E501
-                                                      hf_overrides={"architectures": ["GemmaForSequenceClassification"], # noqa: E501
-                                                                    "classifier_from_token": ["Yes"],  # noqa: E501
-                                                                    "method": "no_post_processing"}),  # noqa: E501
-    "LlamaForSequenceClassification": _HfExamplesInfo("Skywork/Skywork-Reward-V2-Llama-3.2-1B"),  # noqa: E501
-    "Qwen2ForSequenceClassification": _HfExamplesInfo("jason9693/Qwen2.5-1.5B-apeach"),  # noqa: E501
-    "Qwen3ForSequenceClassification": _HfExamplesInfo("tomaarsen/Qwen3-Reranker-0.6B-seq-cls"),  # noqa: E501
+    "GemmaForSequenceClassification": _HfExamplesInfo(
+        "BAAI/bge-reranker-v2-gemma",
+        hf_overrides={
+            "architectures": ["GemmaForSequenceClassification"],
+            "classifier_from_token": ["Yes"],
+            "method": "no_post_processing",
+        },
+    ),
+    "LlamaForSequenceClassification": _HfExamplesInfo(
+        "Skywork/Skywork-Reward-V2-Llama-3.2-1B"
+    ),
+    "Qwen2ForSequenceClassification": _HfExamplesInfo("jason9693/Qwen2.5-1.5B-apeach"),
+    "Qwen3ForSequenceClassification": _HfExamplesInfo(
+        "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
+    ),
 }
 
 _MULTIMODAL_EXAMPLE_MODELS = {
     # [Decoder-only]
     "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
-    "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereForAI/aya-vision-8b"), # noqa: E501
-    "Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b",  # noqa: E501
-                                                     extras={"6b": "Salesforce/blip2-opt-6.7b"}),  # noqa: E501
-    "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"),  # noqa: E501
-    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/command-a-vision-07-2025"), # noqa: E501
-    "DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny",  # noqa: E501
-                                                extras={"fork": "Isotr0py/deepseek-vl2-tiny"},  # noqa: E501
-                                                max_transformers_version="4.48",  # noqa: E501
-                                                transformers_version_reason="HF model is not compatible.",  # noqa: E501
-                                                hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}),  # noqa: E501
-    "DotsOCRForCausalLM": _HfExamplesInfo("rednote-hilab/dots.ocr",
-                                          trust_remote_code=True),
+    "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereForAI/aya-vision-8b"),
+    "Blip2ForConditionalGeneration": _HfExamplesInfo(
+        "Salesforce/blip2-opt-2.7b",
+        extras={"6b": "Salesforce/blip2-opt-6.7b"},
+    ),
+    "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"),
+    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
+        "CohereLabs/command-a-vision-07-2025"
+    ),
+    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
+        "deepseek-ai/deepseek-vl2-tiny",
+        extras={"fork": "Isotr0py/deepseek-vl2-tiny"},
+        max_transformers_version="4.48",
+        transformers_version_reason="HF model is not compatible.",
+        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
+    ),
+    "DotsOCRForCausalLM": _HfExamplesInfo(
+        "rednote-hilab/dots.ocr", trust_remote_code=True
+    ),
     "Emu3ForConditionalGeneration": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
-    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo("baidu/ERNIE-4.5-VL-28B-A3B-PT",  # noqa: E501
-                                                              trust_remote_code=True),
+    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
+        "baidu/ERNIE-4.5-VL-28B-A3B-PT",
+        trust_remote_code=True,
+    ),
     "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
     "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
-    "Gemma3nForConditionalGeneration": _HfExamplesInfo("google/gemma-3n-E2B-it",    # noqa: E501
-                                        min_transformers_version="4.53"),
-    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-2b"),  # noqa: E501
-    "GLM4VForCausalLM": _HfExamplesInfo("zai-org/glm-4v-9b",
-                                        trust_remote_code=True,
-                                        hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
-    "Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"),  # noqa: E501
-    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V",
-                                                        min_transformers_version="4.56"),  # noqa: E501
-    "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
-                                      trust_remote_code=True,
-                                      extras={"2b": "h2oai/h2ovl-mississippi-2b"},  # noqa: E501
-                                      max_transformers_version="4.48",  # noqa: E501
-                                      transformers_version_reason="HF model is not compatible."),  # noqa: E501
-    "HCXVisionForCausalLM": _HfExamplesInfo("naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",  # noqa: E501
-                                            trust_remote_code=True),
-    "Idefics3ForConditionalGeneration": _HfExamplesInfo("HuggingFaceM4/Idefics3-8B-Llama3",  # noqa: E501
-                                                        {"tiny": "HuggingFaceTB/SmolVLM-256M-Instruct"},    # noqa: E501
-                                                        min_transformers_version="4.56",
-                                                        transformers_version_reason="HF model broken in 4.55"),  # noqa: E501
-    "InternS1ForConditionalGeneration": _HfExamplesInfo("internlm/Intern-S1",
-                                                        trust_remote_code=True),  # noqa: E501
-    "InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
-                                         extras={"2B": "OpenGVLab/InternVL2-2B",
-                                                 "3.0": "OpenGVLab/InternVL3-1B",   # noqa: E501
-                                                 "3.5-qwen3": "OpenGVLab/InternVL3_5-1B",   # noqa: E501
-                                                 "3.5-qwen3moe": "OpenGVLab/InternVL3_5-30B-A3B",   # noqa: E501
-                                                 "3.5-gptoss": "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"},  # noqa: E501
-                                         trust_remote_code=True),
-    "InternVLForConditionalGeneration": _HfExamplesInfo("OpenGVLab/InternVL3-1B-hf"),    # noqa: E501
-    "KeyeForConditionalGeneration": _HfExamplesInfo("Kwai-Keye/Keye-VL-8B-Preview", # noqa: E501
-                                                    trust_remote_code=True),
-    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo("Kwai-Keye/Keye-VL-1_5-8B", # noqa: E501
-                                                         trust_remote_code=True),
-    "KimiVLForConditionalGeneration": _HfExamplesInfo("moonshotai/Kimi-VL-A3B-Instruct",  # noqa: E501
-                                                      extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"},  # noqa: E501
-                                                      trust_remote_code=True),
-    "Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct",   # noqa: E501
-                                                      max_model_len=10240,
-                                                      extras={"llama-guard-4": "meta-llama/Llama-Guard-4-12B"},  # noqa: E501
-                                                      ),
-    "LlavaForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-1.5-7b-hf",
-                                                     extras={"mistral": "mistral-community/pixtral-12b", # noqa: E501
-                                                             "mistral-fp8": "nm-testing/pixtral-12b-FP8-dynamic"}),  # noqa: E501
-    "LlavaNextForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-v1.6-mistral-7b-hf"),  # noqa: E501
-    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo("llava-hf/LLaVA-NeXT-Video-7B-hf"),  # noqa: E501
-    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),  # noqa: E501
-    "MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3",  # noqa: E501
-                                                      max_transformers_version="4.48",  # noqa: E501
-                                                      transformers_version_reason="HF model is not compatible.",  # noqa: E501
-                                                      hf_overrides={"architectures": ["MantisForConditionalGeneration"]}),  # noqa: E501
-    "MiDashengLMModel": _HfExamplesInfo("mispeech/midashenglm-7b",
-                            trust_remote_code=True),
-    "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6",
-                                trust_remote_code=True),
-    "MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5",
-                                extras={"2.6": "openbmb/MiniCPM-V-2_6", "4.0": "openbmb/MiniCPM-V-4", "4.5": "openbmb/MiniCPM-V-4_5"},  # noqa: E501
-                                trust_remote_code=True),
-    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo("MiniMaxAI/MiniMax-VL-01", # noqa: E501
-                                              trust_remote_code=True,
-                                              v0_only=True),
-    "Mistral3ForConditionalGeneration": _HfExamplesInfo("mistralai/Mistral-Small-3.1-24B-Instruct-2503",  # noqa: E501
-                                                        extras={"fp8": "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"}),  # noqa: E501
-    "MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924",
-                                        max_transformers_version="4.48",
-                                        transformers_version_reason="Incorrectly-detected `tensorflow` import.",  # noqa: E501
-                                        extras={"olmo": "allenai/Molmo-7B-O-0924"},  # noqa: E501
-                                        trust_remote_code=True),
-    "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B",
-                              trust_remote_code=True),
-    "Llama_Nemotron_Nano_VL" : _HfExamplesInfo("nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1", # noqa: E501
-                                                     trust_remote_code=True),
-    "NemotronH_Nano_VL_V2": _HfExamplesInfo("nano_vl_dummy",
-                                          is_available_online=False,
-                                          trust_remote_code=True),
-    "Ovis": _HfExamplesInfo("AIDC-AI/Ovis2-1B", trust_remote_code=True,
-                            max_transformers_version="4.53",
-                            transformers_version_reason="HF model is not compatible",  # noqa: E501
-                            extras={"1.6-llama": "AIDC-AI/Ovis1.6-Llama3.2-3B",
-                                    "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B"}),  # noqa: E501
-    "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B",
-                               trust_remote_code=True),
-    "PaliGemmaForConditionalGeneration": _HfExamplesInfo("google/paligemma-3b-mix-224",  # noqa: E501
-                                                         extras={"v2": "google/paligemma2-3b-ft-docci-448"}),  # noqa: E501
-    "Phi3VForCausalLM": _HfExamplesInfo("microsoft/Phi-3-vision-128k-instruct",
-                                        trust_remote_code=True,
-                                        max_transformers_version="4.48",
-                                        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
-                                        extras={"phi3.5": "microsoft/Phi-3.5-vision-instruct"}),  # noqa: E501
-    "Phi4MMForCausalLM": _HfExamplesInfo("microsoft/Phi-4-multimodal-instruct",
-                                        trust_remote_code=True),
-    "Phi4MultimodalForCausalLM": _HfExamplesInfo("microsoft/Phi-4-multimodal-instruct",  # noqa: E501
-                                                 revision="refs/pr/70"),
-    "PixtralForConditionalGeneration": _HfExamplesInfo("mistralai/Pixtral-12B-2409",  # noqa: E501
-                                                       tokenizer_mode="mistral"),
-    "QwenVLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen-VL",
-                                                      extras={"chat": "Qwen/Qwen-VL-Chat"},  # noqa: E501
-                                                      trust_remote_code=True,
-                                                      hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]}),  # noqa: E501
-    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"),  # noqa: E501
-    "Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"),  # noqa: E501
-    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct", # noqa: E501
-                                                          max_model_len=4096),
+    "Gemma3nForConditionalGeneration": _HfExamplesInfo(
+        "google/gemma-3n-E2B-it",
+        min_transformers_version="4.53",
+    ),
+    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
+        "ibm-granite/granite-speech-3.3-2b"
+    ),
+    "GLM4VForCausalLM": _HfExamplesInfo(
+        "zai-org/glm-4v-9b",
+        trust_remote_code=True,
+        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
+    ),
+    "Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"),
+    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(
+        "zai-org/GLM-4.5V", min_transformers_version="4.56"
+    ),
+    "H2OVLChatModel": _HfExamplesInfo(
+        "h2oai/h2ovl-mississippi-800m",
+        trust_remote_code=True,
+        extras={"2b": "h2oai/h2ovl-mississippi-2b"},
+        max_transformers_version="4.48",
+        transformers_version_reason="HF model is not compatible.",
+    ),
+    "HCXVisionForCausalLM": _HfExamplesInfo(
+        "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
+        trust_remote_code=True,
+    ),
+    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
+        "HuggingFaceM4/Idefics3-8B-Llama3",
+        {"tiny": "HuggingFaceTB/SmolVLM-256M-Instruct"},
+        min_transformers_version="4.56",
+        transformers_version_reason="HF model broken in 4.55",
+    ),
+    "InternS1ForConditionalGeneration": _HfExamplesInfo(
+        "internlm/Intern-S1", trust_remote_code=True
+    ),
+    "InternVLChatModel": _HfExamplesInfo(
+        "OpenGVLab/InternVL2-1B",
+        extras={
+            "2B": "OpenGVLab/InternVL2-2B",
+            "3.0": "OpenGVLab/InternVL3-1B",
+            "3.5-qwen3": "OpenGVLab/InternVL3_5-1B",
+            "3.5-qwen3moe": "OpenGVLab/InternVL3_5-30B-A3B",
+            "3.5-gptoss": "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview",
+        },
+        trust_remote_code=True,
+    ),
+    "InternVLForConditionalGeneration": _HfExamplesInfo("OpenGVLab/InternVL3-1B-hf"),
+    "KeyeForConditionalGeneration": _HfExamplesInfo(
+        "Kwai-Keye/Keye-VL-8B-Preview",
+        trust_remote_code=True,
+    ),
+    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
+        "Kwai-Keye/Keye-VL-1_5-8B",
+        trust_remote_code=True,
+    ),
+    "KimiVLForConditionalGeneration": _HfExamplesInfo(
+        "moonshotai/Kimi-VL-A3B-Instruct",
+        extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"},
+        trust_remote_code=True,
+    ),
+    "Llama4ForConditionalGeneration": _HfExamplesInfo(
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        max_model_len=10240,
+        extras={"llama-guard-4": "meta-llama/Llama-Guard-4-12B"},
+    ),
+    "LlavaForConditionalGeneration": _HfExamplesInfo(
+        "llava-hf/llava-1.5-7b-hf",
+        extras={
+            "mistral": "mistral-community/pixtral-12b",
+            "mistral-fp8": "nm-testing/pixtral-12b-FP8-dynamic",
+        },
+    ),
+    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
+        "llava-hf/llava-v1.6-mistral-7b-hf"
+    ),
+    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
+        "llava-hf/LLaVA-NeXT-Video-7B-hf"
+    ),
+    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
+        "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
+    ),
+    "MantisForConditionalGeneration": _HfExamplesInfo(
+        "TIGER-Lab/Mantis-8B-siglip-llama3",
+        max_transformers_version="4.48",
+        transformers_version_reason="HF model is not compatible.",
+        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
+    ),
+    "MiDashengLMModel": _HfExamplesInfo(
+        "mispeech/midashenglm-7b", trust_remote_code=True
+    ),
+    "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", trust_remote_code=True),
+    "MiniCPMV": _HfExamplesInfo(
+        "openbmb/MiniCPM-Llama3-V-2_5",
+        extras={
+            "2.6": "openbmb/MiniCPM-V-2_6",
+            "4.0": "openbmb/MiniCPM-V-4",
+            "4.5": "openbmb/MiniCPM-V-4_5",
+        },
+        trust_remote_code=True,
+    ),
+    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
+        "MiniMaxAI/MiniMax-VL-01",
+        trust_remote_code=True,
+        v0_only=True,
+    ),
+    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
+        "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+        extras={"fp8": "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"},
+    ),
+    "MolmoForCausalLM": _HfExamplesInfo(
+        "allenai/Molmo-7B-D-0924",
+        max_transformers_version="4.48",
+        transformers_version_reason="Incorrectly-detected `tensorflow` import.",
+        extras={"olmo": "allenai/Molmo-7B-O-0924"},
+        trust_remote_code=True,
+    ),
+    "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B", trust_remote_code=True),
+    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
+        "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1",
+        trust_remote_code=True,
+    ),
+    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
+        "nano_vl_dummy", is_available_online=False, trust_remote_code=True
+    ),
+    "Ovis": _HfExamplesInfo(
+        "AIDC-AI/Ovis2-1B",
+        trust_remote_code=True,
+        max_transformers_version="4.53",
+        transformers_version_reason="HF model is not compatible",
+        extras={
+            "1.6-llama": "AIDC-AI/Ovis1.6-Llama3.2-3B",
+            "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B",
+        },
+    ),
+    "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True),
+    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
+        "google/paligemma-3b-mix-224",
+        extras={"v2": "google/paligemma2-3b-ft-docci-448"},
+    ),
+    "Phi3VForCausalLM": _HfExamplesInfo(
+        "microsoft/Phi-3-vision-128k-instruct",
+        trust_remote_code=True,
+        max_transformers_version="4.48",
+        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
+        extras={"phi3.5": "microsoft/Phi-3.5-vision-instruct"},
+    ),
+    "Phi4MMForCausalLM": _HfExamplesInfo(
+        "microsoft/Phi-4-multimodal-instruct", trust_remote_code=True
+    ),
+    "Phi4MultimodalForCausalLM": _HfExamplesInfo(
+        "microsoft/Phi-4-multimodal-instruct",
+        revision="refs/pr/70",
+    ),
+    "PixtralForConditionalGeneration": _HfExamplesInfo(
+        "mistralai/Pixtral-12B-2409",
+        tokenizer_mode="mistral",
+    ),
+    "QwenVLForConditionalGeneration": _HfExamplesInfo(
+        "Qwen/Qwen-VL",
+        extras={"chat": "Qwen/Qwen-VL-Chat"},
+        trust_remote_code=True,
+        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
+    ),
+    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
+        "Qwen/Qwen2-Audio-7B-Instruct"
+    ),
+    "Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"),
+    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+        max_model_len=4096,
+    ),
     "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B"),
-    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"),  # noqa: E501
-    "Qwen3VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen3-VL-4B-Instruct", # noqa: E501
-                                                        max_model_len=4096,
-                                                        min_transformers_version="4.57",
-                                                        is_available_online=False),
-    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen3-VL-30B-A3B-Instruct", # noqa: E501
-                                                          max_model_len=4096,
-                                                          min_transformers_version="4.57",
-                                                          is_available_online=False),
-    "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B",
-                                                 trust_remote_code=True),
-    "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B",
-                                           trust_remote_code=True),
-    "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct",  # noqa: E501
-                                                       min_transformers_version="4.56",
-                                                       transformers_version_reason="HF model broken in 4.55"),  # noqa: E501
-    "Step3VLForConditionalGeneration": _HfExamplesInfo("stepfun-ai/step3",
-                                                        trust_remote_code=True),
-    "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",  # noqa: E501
-                                     trust_remote_code=True),
-    "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"),  # noqa: E501
-    "Tarsier2ForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier2-Recap-7b",  # noqa: E501
-                                                        hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]}),  # noqa: E501
+    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"),
+    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
+        "Qwen/Qwen3-VL-4B-Instruct",
+        max_model_len=4096,
+        min_transformers_version="4.57",
+        is_available_online=False,
+    ),
+    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
+        "Qwen/Qwen3-VL-30B-A3B-Instruct",
+        max_model_len=4096,
+        min_transformers_version="4.57",
+        is_available_online=False,
+    ),
+    "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True),
+    "SkyworkR1VChatModel": _HfExamplesInfo(
+        "Skywork/Skywork-R1V-38B", trust_remote_code=True
+    ),
+    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
+        "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
+        min_transformers_version="4.56",
+        transformers_version_reason="HF model broken in 4.55",
+    ),
+    "Step3VLForConditionalGeneration": _HfExamplesInfo(
+        "stepfun-ai/step3", trust_remote_code=True
+    ),
+    "UltravoxModel": _HfExamplesInfo(
+        "fixie-ai/ultravox-v0_5-llama-3_2-1b",
+        trust_remote_code=True,
+    ),
+    "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"),
+    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
+        "omni-research/Tarsier2-Recap-7b",
+        hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
+    ),
     "VoxtralForConditionalGeneration": _HfExamplesInfo(
         "mistralai/Voxtral-Mini-3B-2507",
         min_transformers_version="4.54",
@@ -600,80 +795,120 @@ _MULTIMODAL_EXAMPLE_MODELS = {
         is_available_online=False,
     ),
     # [Encoder-decoder]
-    "WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"),  # noqa: E501
+    "WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"),
     # [Cross-encoder]
-    "JinaVLForRanking": _HfExamplesInfo("jinaai/jina-reranker-m0"),   # noqa: E501
+    "JinaVLForRanking": _HfExamplesInfo("jinaai/jina-reranker-m0"),
 }
 
 
 _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
-    "MedusaModel": _HfExamplesInfo("JackFram/llama-68m",
-                                   speculative_model="abhigoyal/vllm-medusa-llama-68m-random"),  # noqa: E501
+    "MedusaModel": _HfExamplesInfo(
+        "JackFram/llama-68m", speculative_model="abhigoyal/vllm-medusa-llama-68m-random"
+    ),
     # Temporarily disabled.
     # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
-    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo("JackFram/llama-160m",
-    #                                                 speculative_model="ibm-ai-platform/llama-160m-accelerator"),  # noqa: E501
-    "DeepSeekMTPModel": _HfExamplesInfo("luccafong/deepseek_mtp_main_random",
-                                        speculative_model="luccafong/deepseek_mtp_draft_random",  # noqa: E501
-                                        trust_remote_code=True),
-    "EagleDeepSeekMTPModel": _HfExamplesInfo("eagle618/deepseek-v3-random",
-                                        speculative_model="eagle618/eagle-deepseek-v3-random",  # noqa: E501
-                                        trust_remote_code=True),
-    "EagleLlamaForCausalLM": _HfExamplesInfo("meta-llama/Meta-Llama-3-8B-Instruct", # noqa: E501
-                                             trust_remote_code=True,
-                                             speculative_model="yuhuili/EAGLE-LLaMA3-Instruct-8B",
-                                             tokenizer="meta-llama/Meta-Llama-3-8B-Instruct"), # noqa: E501
-    "Eagle3LlamaForCausalLM": _HfExamplesInfo("meta-llama/Llama-3.1-8B-Instruct",  # noqa: E501
-                                            trust_remote_code=True,
-                                            speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", # noqa: E501
-                                            tokenizer="meta-llama/Llama-3.1-8B-Instruct",
-                                            use_original_num_layers=True,
-                                            max_model_len=10240),
-    "LlamaForCausalLMEagle3": _HfExamplesInfo("Qwen/Qwen3-8B",  # noqa: E501
-                                            trust_remote_code=True,
-                                            speculative_model="AngelSlim/Qwen3-8B_eagle3",   # noqa: E501
-                                            tokenizer="Qwen/Qwen3-8B",
-                                            use_original_num_layers=True),
+    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
+    #     "JackFram/llama-160m",
+    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
+    # ),
+    "DeepSeekMTPModel": _HfExamplesInfo(
+        "luccafong/deepseek_mtp_main_random",
+        speculative_model="luccafong/deepseek_mtp_draft_random",
+        trust_remote_code=True,
+    ),
+    "EagleDeepSeekMTPModel": _HfExamplesInfo(
+        "eagle618/deepseek-v3-random",
+        speculative_model="eagle618/eagle-deepseek-v3-random",
+        trust_remote_code=True,
+    ),
+    "EagleLlamaForCausalLM": _HfExamplesInfo(
+        "meta-llama/Meta-Llama-3-8B-Instruct",
+        trust_remote_code=True,
+        speculative_model="yuhuili/EAGLE-LLaMA3-Instruct-8B",
+        tokenizer="meta-llama/Meta-Llama-3-8B-Instruct",
+    ),
+    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
+        "meta-llama/Llama-3.1-8B-Instruct",
+        trust_remote_code=True,
+        speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
+        tokenizer="meta-llama/Llama-3.1-8B-Instruct",
+        use_original_num_layers=True,
+        max_model_len=10240,
+    ),
+    "LlamaForCausalLMEagle3": _HfExamplesInfo(
+        "Qwen/Qwen3-8B",
+        trust_remote_code=True,
+        speculative_model="AngelSlim/Qwen3-8B_eagle3",
+        tokenizer="Qwen/Qwen3-8B",
+        use_original_num_layers=True,
+    ),
     "EagleLlama4ForCausalLM": _HfExamplesInfo(
         "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct",
         trust_remote_code=True,
         speculative_model="morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct",
-        tokenizer="meta-llama/Llama-4-Scout-17B-16E-Instruct"),  # noqa: E501
-    "EagleMiniCPMForCausalLM": _HfExamplesInfo("openbmb/MiniCPM-1B-sft-bf16",
-                                            trust_remote_code=True,
-                                            is_available_online=False,
-                                            speculative_model="openbmb/MiniCPM-2B-sft-bf16",
-                                            tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
-    "ErnieMTPModel": _HfExamplesInfo("baidu/ERNIE-4.5-21B-A3B-PT",
-                                    trust_remote_code=True,
-                                    speculative_model="baidu/ERNIE-4.5-21B-A3B-PT"),
-    "Glm4MoeMTPModel": _HfExamplesInfo("zai-org/GLM-4.5",
-                                        speculative_model="zai-org/GLM-4.5",
-                                        min_transformers_version="4.56",
-                                        is_available_online=False),
+        tokenizer="meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    ),
+    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
+        "openbmb/MiniCPM-1B-sft-bf16",
+        trust_remote_code=True,
+        is_available_online=False,
+        speculative_model="openbmb/MiniCPM-2B-sft-bf16",
+        tokenizer="openbmb/MiniCPM-2B-sft-bf16",
+    ),
+    "ErnieMTPModel": _HfExamplesInfo(
+        "baidu/ERNIE-4.5-21B-A3B-PT",
+        trust_remote_code=True,
+        speculative_model="baidu/ERNIE-4.5-21B-A3B-PT",
+    ),
+    "Glm4MoeMTPModel": _HfExamplesInfo(
+        "zai-org/GLM-4.5",
+        speculative_model="zai-org/GLM-4.5",
+        min_transformers_version="4.56",
+        is_available_online=False,
+    ),
     "LongCatFlashMTPModel": _HfExamplesInfo(
         "meituan-longcat/LongCat-Flash-Chat",
         trust_remote_code=True,
-        speculative_model="meituan-longcat/LongCat-Flash-Chat"),
-    "MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
-                                    trust_remote_code=True,
-                                    speculative_model="XiaomiMiMo/MiMo-7B-RL"),
+        speculative_model="meituan-longcat/LongCat-Flash-Chat",
+    ),
+    "MiMoMTPModel": _HfExamplesInfo(
+        "XiaomiMiMo/MiMo-7B-RL",
+        trust_remote_code=True,
+        speculative_model="XiaomiMiMo/MiMo-7B-RL",
+    ),
     "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
         "Qwen/Qwen2.5-VL-7B-Instruct",
-        speculative_model="Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
-    "Qwen3NextMTP": _HfExamplesInfo("Qwen/Qwen3-Next-80B-A3B-Instruct",
-                                     min_transformers_version="4.56.3"),
+        speculative_model="Rayzl/qwen2.5-vl-7b-eagle3-sgl",
+    ),
+    "Qwen3NextMTP": _HfExamplesInfo(
+        "Qwen/Qwen3-Next-80B-A3B-Instruct", min_transformers_version="4.56.3"
+    ),
 }
 
 _TRANSFORMERS_BACKEND_MODELS = {
-    "TransformersEmbeddingModel": _HfExamplesInfo("BAAI/bge-base-en-v1.5", min_transformers_version="4.57.0.dev0"),  # noqa: E501
-    "TransformersForSequenceClassification": _HfExamplesInfo("papluca/xlm-roberta-base-language-detection", min_transformers_version="4.57.0.dev0"),  # noqa: E501
-    "TransformersForCausalLM": _HfExamplesInfo("hmellor/Ilama-3.2-1B", trust_remote_code=True),  # noqa: E501
+    "TransformersEmbeddingModel": _HfExamplesInfo(
+        "BAAI/bge-base-en-v1.5", min_transformers_version="4.57.0.dev0"
+    ),
+    "TransformersForSequenceClassification": _HfExamplesInfo(
+        "papluca/xlm-roberta-base-language-detection",
+        min_transformers_version="4.57.0.dev0",
+    ),
+    "TransformersForCausalLM": _HfExamplesInfo(
+        "hmellor/Ilama-3.2-1B", trust_remote_code=True
+    ),
     "TransformersForMultimodalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
-    "TransformersMoEForCausalLM": _HfExamplesInfo("allenai/OLMoE-1B-7B-0924", min_transformers_version="4.57.0.dev0"),  # noqa: E501
-    "TransformersMoEForMultimodalLM": _HfExamplesInfo("Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="4.57.0.dev0"),  # noqa: E501
-    "TransformersMoEEmbeddingModel": _HfExamplesInfo("Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"),  # noqa: E501
-    "TransformersMoEForSequenceClassification": _HfExamplesInfo("Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"),  # noqa: E501
+    "TransformersMoEForCausalLM": _HfExamplesInfo(
+        "allenai/OLMoE-1B-7B-0924", min_transformers_version="4.57.0.dev0"
+    ),
+    "TransformersMoEForMultimodalLM": _HfExamplesInfo(
+        "Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="4.57.0.dev0"
+    ),
+    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
+        "Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
+    ),
+    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
+        "Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
+    ),
 }
 
 _EXAMPLE_MODELS = {
@@ -699,8 +934,9 @@ class HfExampleModels:
         try:
             return self.hf_models[model_arch]
         except KeyError:
-            raise ValueError(f"No example model defined for {model_arch}; "
-                             f"please update this file.") from None
+            raise ValueError(
+                f"No example model defined for {model_arch}; please update this file."
+            ) from None
 
     def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
         for info in self.hf_models.values():
@@ -712,8 +948,9 @@ class HfExampleModels:
             if any(extra == model_id for extra in info.extras.values()):
                 return info
 
-        raise ValueError(f"No example model defined for {model_id}; "
-                         f"please update this file.")
+        raise ValueError(
+            f"No example model defined for {model_id}; please update this file."
+        )
 
 
 HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
diff --git a/tests/multimodal/test_cache.py b/tests/multimodal/test_cache.py
index 49493a2e04..fe983990b9 100644
--- a/tests/multimodal/test_cache.py
+++ b/tests/multimodal/test_cache.py
@@ -71,25 +71,27 @@ def _dummy_items(
     )
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("item", "expected_size"),
     [
         (_dummy_item("a", {"a1": 100}), 100),
         (_dummy_item("a", {"a1": 100, "a2": 110}), 210),
         (_dummy_items({"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}), 460),  # noqa: E501
-        (_dummy_items({"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}).get_data(), 460),  # noqa: E501
+        (
+            _dummy_items(
+                {"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}
+            ).get_data(),
+            460,
+        ),  # noqa: E501
     ],
 )
-# yapf: enable
 def test_cache_item_size(item, expected_size):
     cache = MultiModalCache.get_lru_cache(2048, type(item))
 
     cache[""] = item
     assert cache.currsize == expected_size
 
-    prompt_update = PromptInsertion("dummy", "target", "insertion") \
-        .resolve(0)
+    prompt_update = PromptInsertion("dummy", "target", "insertion").resolve(0)
 
     cache[""] = MultiModalProcessorCacheItem(item, [prompt_update])
     assert cache.currsize == expected_size
@@ -106,9 +108,9 @@ def _create_vllm_config(
     return VllmConfig(
         model_config=ModelConfig(
             model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
-            mm_processor_cache_gb=mm_processor_cache_gb),
-        parallel_config=ParallelConfig(
-            data_parallel_size=1 if enable_ipc else 2),
+            mm_processor_cache_gb=mm_processor_cache_gb,
+        ),
+        parallel_config=ParallelConfig(data_parallel_size=1 if enable_ipc else 2),
     )
 
 
@@ -124,11 +126,9 @@ def _compare_caches(
     seed: int = 0,
 ):
     cache_0_p0 = processor_cache_from_config(config_0, MULTIMODAL_REGISTRY)
-    cache_0_p1 = engine_receiver_cache_from_config(config_0,
-                                                   MULTIMODAL_REGISTRY)
+    cache_0_p1 = engine_receiver_cache_from_config(config_0, MULTIMODAL_REGISTRY)
     cache_1_p0 = processor_cache_from_config(config_1, MULTIMODAL_REGISTRY)
-    cache_1_p1 = engine_receiver_cache_from_config(config_1,
-                                                   MULTIMODAL_REGISTRY)
+    cache_1_p1 = engine_receiver_cache_from_config(config_1, MULTIMODAL_REGISTRY)
 
     cache_size_gb = max(
         config_0.model_config.multimodal_config.mm_processor_cache_gb,
@@ -142,8 +142,7 @@ def _compare_caches(
         for _ in range(int(item_capacity / hit_rate))
     ]
     all_hashes = [
-        MultiModalHasher.hash_kwargs(item=item.get_data())
-        for item in all_items
+        MultiModalHasher.hash_kwargs(item=item.get_data()) for item in all_items
     ]
 
     # Should not be used since there is nothing to convert to text
@@ -162,7 +161,8 @@ def _compare_caches(
             for _ in range(is_cached_calls_per_iter):
                 cache_0_p0.is_cached(selected_hashes)
             cache_0_p0_out = [
-                item for item, _ in cache_0_p0.get_and_update(
+                item
+                for item, _ in cache_0_p0.get_and_update(
                     [(item, prompt_update.content) for item in selected_items],
                     selected_hashes,
                 )
@@ -174,7 +174,8 @@ def _compare_caches(
             for _ in range(is_cached_calls_per_iter):
                 cache_1_p0.is_cached(selected_hashes)
             cache_1_p0_out = [
-                item for item, _ in cache_1_p0.get_and_update(
+                item
+                for item, _ in cache_1_p0.get_and_update(
                     [(item, prompt_update.content) for item in selected_items],
                     selected_hashes,
                 )
@@ -183,14 +184,12 @@ def _compare_caches(
         if cache_0_p1 is None:
             cache_0_p1_out = cache_0_p0_out
         else:
-            cache_0_p1_out = cache_0_p1.get_and_update(cache_0_p0_out,
-                                                       selected_hashes)
+            cache_0_p1_out = cache_0_p1.get_and_update(cache_0_p0_out, selected_hashes)
 
         if cache_1_p1 is None:
             cache_1_p1_out = cache_1_p0_out
         else:
-            cache_1_p1_out = cache_1_p1.get_and_update(cache_1_p0_out,
-                                                       selected_hashes)
+            cache_1_p1_out = cache_1_p1.get_and_update(cache_1_p0_out, selected_hashes)
 
         assert cache_0_p1_out == cache_1_p1_out, f"Failed at {it=}"
 
diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py
index 961a2c86b2..87733f20c4 100644
--- a/tests/multimodal/test_processing.py
+++ b/tests/multimodal/test_processing.py
@@ -9,9 +9,6 @@ import pytest
 
 from vllm.config import ModelConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.multimodal.processing import (
     InputProcessingContext,
     PlaceholderFeaturesInfo,
@@ -24,8 +21,6 @@ from vllm.multimodal.processing import (
     iter_token_matches,
     replace_token_matches,
 )
-
-# yapf: enable
 from vllm.multimodal.profiling import MultiModalProfiler
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 
@@ -34,7 +29,6 @@ from .utils import random_image
 pytestmark = pytest.mark.cpu_test
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("token_ids", "match_ids", "expected"),
     [
@@ -44,34 +38,34 @@ pytestmark = pytest.mark.cpu_test
             [32000, 32000, 32000],
             [32000],
             [
-                { "start_idx": 0, "end_idx": 1 },
-                { "start_idx": 1, "end_idx": 2 },
-                { "start_idx": 2, "end_idx": 3 },
+                {"start_idx": 0, "end_idx": 1},
+                {"start_idx": 1, "end_idx": 2},
+                {"start_idx": 2, "end_idx": 3},
             ],
         ),
         (
             [32000, 32000, 32000],
             [32000, 32000],
-            [{ "start_idx": 0, "end_idx": 2 }],
+            [{"start_idx": 0, "end_idx": 2}],
         ),
         (
             [32000, 32000, 32000],
             [32000, 32000, 32000],
-            [{ "start_idx": 0, "end_idx": 3 }],
+            [{"start_idx": 0, "end_idx": 3}],
         ),
         (
             [9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
             [28747, 32000],
             [
-                { "start_idx": 1, "end_idx": 3 },
-                { "start_idx": 6, "end_idx": 8 },
+                {"start_idx": 1, "end_idx": 3},
+                {"start_idx": 6, "end_idx": 8},
             ],
         ),
         (
             [9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
             [28747, 32000, 32000, 32000],
             [
-                { "start_idx": 1, "end_idx": 5 },
+                {"start_idx": 1, "end_idx": 5},
             ],
         ),
         (
@@ -82,14 +76,13 @@ pytestmark = pytest.mark.cpu_test
     ],
 )
 @pytest.mark.parametrize("start_idx", [0, 4, 8])
-# yapf: enable
 def test_iter_token_matches(token_ids, match_ids, expected, start_idx):
-    result = list(iter_token_matches(token_ids, match_ids,
-                                     start_idx=start_idx))
+    result = list(iter_token_matches(token_ids, match_ids, start_idx=start_idx))
 
     # Manually constructed results
-    assert [item._asdict() for item in result
-            ] == [item for item in expected if item["start_idx"] >= start_idx]
+    assert [item._asdict() for item in result] == [
+        item for item in expected if item["start_idx"] >= start_idx
+    ]
 
     # Invariants
     match_lens = [end - start for start, end in result]
@@ -97,7 +90,6 @@ def test_iter_token_matches(token_ids, match_ids, expected, start_idx):
     assert all(match_len == len(match_ids) for match_len in match_lens)
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("token_ids", "match_ids", "new_ids", "expected"),
     [
@@ -141,7 +133,6 @@ def test_iter_token_matches(token_ids, match_ids, expected, start_idx):
         ),
     ],
 )
-# yapf: enable
 def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
     result = replace_token_matches(token_ids, match_ids, new_ids)
 
@@ -149,7 +140,6 @@ def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
     assert result == expected
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("prompt", "target_by_key", "expected_by_key"),
     [
@@ -166,11 +156,11 @@ def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
                 "pattern_1": [],
                 "pattern_2": [],
                 "pattern_3": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
                 "pattern_4": [],
                 "pattern_5": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
             },
         ),
@@ -186,26 +176,26 @@ def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
             },
             {
                 "pattern_1": [
-                    { "start_idx": 0, "end_idx": 1 },
-                    { "start_idx": 1, "end_idx": 2 },
-                    { "start_idx": 2, "end_idx": 3 },
-                    { "start_idx": 3, "end_idx": 4 },
+                    {"start_idx": 0, "end_idx": 1},
+                    {"start_idx": 1, "end_idx": 2},
+                    {"start_idx": 2, "end_idx": 3},
+                    {"start_idx": 3, "end_idx": 4},
                 ],
                 "pattern_2": [
-                    { "start_idx": 0, "end_idx": 2 },
-                    { "start_idx": 2, "end_idx": 4 },
+                    {"start_idx": 0, "end_idx": 2},
+                    {"start_idx": 2, "end_idx": 4},
                 ],
                 "pattern_3": [
-                    { "start_idx": 0, "end_idx": 3 },
+                    {"start_idx": 0, "end_idx": 3},
                 ],
                 "pattern_4": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
                 "pattern_5": [
-                    { "start_idx": 1, "end_idx": 1 },
+                    {"start_idx": 1, "end_idx": 1},
                 ],
                 "pattern_6": [
-                    { "start_idx": 4, "end_idx": 4 },
+                    {"start_idx": 4, "end_idx": 4},
                 ],
             },
         ),
@@ -221,26 +211,25 @@ def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
             },
             {
                 "pattern_1": [
-                    { "start_idx": 1, "end_idx": 3 },
-                    { "start_idx": 6, "end_idx": 8 },
+                    {"start_idx": 1, "end_idx": 3},
+                    {"start_idx": 6, "end_idx": 8},
                 ],
                 "pattern_2": [
-                    { "start_idx": 1, "end_idx": 5 },
+                    {"start_idx": 1, "end_idx": 5},
                 ],
                 "pattern_3": [],
                 "pattern_4": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
                 "pattern_5": [],
                 "pattern_6": [
-                    { "start_idx": 10, "end_idx": 10 },
+                    {"start_idx": 10, "end_idx": 10},
                 ],
             },
         ),
     ],
 )
 @pytest.mark.parametrize("update_type", [PromptInsertion, PromptReplacement])
-# yapf: enable
 def test_find_token_matches(
     prompt,
     target_by_key,
@@ -272,7 +261,6 @@ def test_find_token_matches(
     } == expected_by_key
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("prompt", "target_by_key", "expected_by_key"),
     [
@@ -288,16 +276,16 @@ def test_find_token_matches(
                 "pattern_5": PromptIndexTargets.end(),
             },
             {
-                "pattern_1": [{ "start_idx": 0, "end_idx": 0 }],
+                "pattern_1": [{"start_idx": 0, "end_idx": 0}],
                 "pattern_2": [],
                 "pattern_3": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
                 "pattern_4": [],
                 "pattern_5": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
-            }
+            },
         ),
         (
             "<image><image><image><image>",
@@ -311,26 +299,26 @@ def test_find_token_matches(
             },
             {
                 "pattern_1": [
-                    { "start_idx": 0, "end_idx": 7 },
-                    { "start_idx": 7, "end_idx": 14 },
-                    { "start_idx": 14, "end_idx": 21 },
-                    { "start_idx": 21, "end_idx": 28 },
+                    {"start_idx": 0, "end_idx": 7},
+                    {"start_idx": 7, "end_idx": 14},
+                    {"start_idx": 14, "end_idx": 21},
+                    {"start_idx": 21, "end_idx": 28},
                 ],
                 "pattern_2": [
-                    { "start_idx": 0, "end_idx": 14 },
-                    { "start_idx": 14, "end_idx": 28 },
+                    {"start_idx": 0, "end_idx": 14},
+                    {"start_idx": 14, "end_idx": 28},
                 ],
                 "pattern_3": [
-                    { "start_idx": 0, "end_idx": 21 },
+                    {"start_idx": 0, "end_idx": 21},
                 ],
                 "pattern_4": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
                 "pattern_5": [
-                    { "start_idx": 7, "end_idx": 7 },
+                    {"start_idx": 7, "end_idx": 7},
                 ],
                 "pattern_6": [
-                    { "start_idx": 28, "end_idx": 28 },
+                    {"start_idx": 28, "end_idx": 28},
                 ],
             },
         ),
@@ -346,21 +334,21 @@ def test_find_token_matches(
             },
             {
                 "pattern_1": [
-                    { "start_idx": 0, "end_idx": 13 },
-                    { "start_idx": 27, "end_idx": 40 },
+                    {"start_idx": 0, "end_idx": 13},
+                    {"start_idx": 27, "end_idx": 40},
                 ],
                 "pattern_2": [
-                    { "start_idx": 0, "end_idx": 27 },
+                    {"start_idx": 0, "end_idx": 27},
                 ],
                 "pattern_3": [],
                 "pattern_4": [
-                    { "start_idx": 0, "end_idx": 0 },
+                    {"start_idx": 0, "end_idx": 0},
                 ],
                 "pattern_5": [
-                    { "start_idx": 13, "end_idx": 13 },
+                    {"start_idx": 13, "end_idx": 13},
                 ],
                 "pattern_6": [
-                    { "start_idx": 48, "end_idx": 48 },
+                    {"start_idx": 48, "end_idx": 48},
                 ],
             },
         ),
@@ -374,22 +362,21 @@ def test_find_token_matches(
             },
             {
                 "pattern_1": [
-                    { "start_idx": 0, "end_idx": 9 },
-                    { "start_idx": 16, "end_idx": 25 },
+                    {"start_idx": 0, "end_idx": 9},
+                    {"start_idx": 16, "end_idx": 25},
                 ],
                 "pattern_2": [
-                    { "start_idx": 0, "end_idx": 16 },
-                    { "start_idx": 16, "end_idx": 32 },
+                    {"start_idx": 0, "end_idx": 16},
+                    {"start_idx": 16, "end_idx": 32},
                 ],
                 "pattern_3": [
-                    { "start_idx": 0, "end_idx": 25 },
+                    {"start_idx": 0, "end_idx": 25},
                 ],
             },
         ),
     ],
 )
 @pytest.mark.parametrize("update_type", [PromptInsertion, PromptReplacement])
-# yapf: enable
 def test_find_text_matches(
     prompt,
     target_by_key,
@@ -421,7 +408,6 @@ def test_find_text_matches(
     } == expected_by_key
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("prompt", "target_by_key", "repl_by_key", "expected_by_update_type_mm_count"),  # noqa: E501
     [
@@ -549,9 +535,8 @@ def test_find_text_matches(
                 },
             },
         ),
-    ]
+    ],
 )
-# yapf: enable
 def test_find_update_text(
     prompt,
     target_by_key,
@@ -562,13 +547,15 @@ def test_find_update_text(
     mock_tokenizer = cast(AnyTokenizer, object())
 
     for (
-            update_type,
-            expected_by_mm_count,
+        update_type,
+        expected_by_mm_count,
     ) in expected_by_update_type_mm_count.items():
         for mm_count, expected in expected_by_mm_count.items():
             mm_prompt_updates = {
-                key: [[update_type(key, target, repl_by_key[key]).resolve(i)]
-                      for i in range(mm_count)]
+                key: [
+                    [update_type(key, target, repl_by_key[key]).resolve(i)]
+                    for i in range(mm_count)
+                ]
                 for key, target in target_by_key.items()
             }
 
@@ -589,7 +576,6 @@ def test_find_update_text(
             assert new_prompt == expected
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("prompt", "target_by_key", "repl_by_key", "expected_by_update_type_mm_count"),  # noqa: E501
     [
@@ -615,8 +601,43 @@ def test_find_update_text(
             {
                 PromptInsertion: {
                     0: [1, 9833, 28747, 32000, 9833, 28747, 32000, 32000, 918],
-                    1: [1, 9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918, 1550, 918, 1550],  # noqa: E501
-                    2: [1, 9833, 28747, 32000, 32000, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918, 1550, 918, 1550, 1550, 918, 1550],  # noqa: E501
+                    1: [
+                        1,
+                        9833,
+                        28747,
+                        32000,
+                        32000,
+                        32000,
+                        9833,
+                        28747,
+                        32000,
+                        32000,
+                        918,
+                        1550,
+                        918,
+                        1550,
+                    ],  # noqa: E501
+                    2: [
+                        1,
+                        9833,
+                        28747,
+                        32000,
+                        32000,
+                        32000,
+                        32000,
+                        32000,
+                        9833,
+                        28747,
+                        32000,
+                        32000,
+                        918,
+                        1550,
+                        918,
+                        1550,
+                        1550,
+                        918,
+                        1550,
+                    ],  # noqa: E501
                 },
                 PromptReplacement: {
                     0: [1, 9833, 28747, 32000, 9833, 28747, 32000, 32000, 918],
@@ -719,9 +740,8 @@ def test_find_update_text(
                 },
             },
         ),
-    ]
+    ],
 )
-# yapf: enable
 def test_find_update_tokens(
     prompt,
     target_by_key,
@@ -732,13 +752,15 @@ def test_find_update_tokens(
     mock_tokenizer = cast(AnyTokenizer, object())
 
     for (
-            update_type,
-            expected_by_mm_count,
+        update_type,
+        expected_by_mm_count,
     ) in expected_by_update_type_mm_count.items():
         for mm_count, expected in expected_by_mm_count.items():
             mm_prompt_updates = {
-                key: [[update_type(key, target, repl_by_key[key]).resolve(i)]
-                      for i in range(mm_count)]
+                key: [
+                    [update_type(key, target, repl_by_key[key]).resolve(i)]
+                    for i in range(mm_count)
+                ]
                 for key, target in target_by_key.items()
             }
 
@@ -759,7 +781,6 @@ def test_find_update_tokens(
             assert new_prompt == expected
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     "repl_by_key",
     [
@@ -796,8 +817,7 @@ def test_find_update_tokens(
                         is_embed=None,
                     ),
                 ],
-            }
-
+            },
         ),
         (
             [1, 32000, 32000, 9833, 28747, 32000, 32000, 1550, 918, 1550],
@@ -828,7 +848,7 @@ def test_find_update_tokens(
                     ),
                 ],
                 # No match for pattern_4 as it has lower priority than pattern_1
-            }
+            },
         ),
         (
             [1, 32000, 32000, 32000, 32000, 32000, 1550, 918, 1550],
@@ -867,12 +887,11 @@ def test_find_update_tokens(
                         is_embed=None,
                     ),
                 ],
-            }
+            },
         ),
-    ]
+    ],
 )
 @pytest.mark.parametrize("update_type", [PromptInsertion, PromptReplacement])
-# yapf: enable
 def test_find_mm_placeholders(
     repl_by_key,
     prompt,
@@ -899,8 +918,15 @@ def test_find_mm_placeholders(
 @pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
 @pytest.mark.parametrize(
     ("limit", "num_supported", "is_valid"),
-    [(0, 0, True), (0, 1, True), (1, 0, False), (1, 1, True), (1, 2, True),
-     (2, 1, False), (2, 2, True)],
+    [
+        (0, 0, True),
+        (0, 1, True),
+        (1, 0, False),
+        (1, 1, True),
+        (1, 2, True),
+        (2, 1, False),
+        (2, 2, True),
+    ],
 )
 def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
     limit_mm_per_prompt = {"image": limit}
@@ -930,8 +956,15 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
 @pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
 @pytest.mark.parametrize(
     ("num_images", "limit", "is_valid"),
-    [(0, 0, True), (0, 1, True), (1, 0, False), (1, 1, True), (1, 2, True),
-     (2, 1, False), (2, 2, True)],
+    [
+        (0, 0, True),
+        (0, 1, True),
+        (1, 0, False),
+        (1, 1, True),
+        (1, 2, True),
+        (2, 1, False),
+        (2, 2, True),
+    ],
 )
 def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
     limit_mm_per_prompt = {"image": limit}
@@ -966,7 +999,6 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
 
 
 class DummyProcessor:
-
     def __init__(self, a: int = 0, b: int = 0) -> None:
         super().__init__()
 
@@ -982,7 +1014,6 @@ class DummyProcessor:
         return dict(a=a, c=c)
 
 
-# yapf: disable
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])  # Dummy
 @pytest.mark.parametrize(
     ("config_kwargs", "inference_kwargs", "expected_kwargs"),
@@ -996,7 +1027,6 @@ class DummyProcessor:
         ({"b": 1, "c": 1}, {}, {"a": 0, "b": 1}),
     ],
 )
-# yapf: enable
 def test_hf_processor_init_kwargs(
     model_id,
     config_kwargs,
@@ -1020,7 +1050,6 @@ def test_hf_processor_init_kwargs(
         assert getattr(processor, k) == v
 
 
-# yapf: disable
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])  # Dummy
 @pytest.mark.parametrize(
     ("config_kwargs", "inference_kwargs", "expected_kwargs"),
@@ -1034,7 +1063,6 @@ def test_hf_processor_init_kwargs(
         ({"b": 1, "c": 1}, {}, {"a": 0, "c": 1}),
     ],
 )
-# yapf: enable
 def test_hf_processor_call_kwargs(
     model_id,
     config_kwargs,
diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py
index 81b332059d..ea795fcbbd 100644
--- a/tests/multimodal/test_utils.py
+++ b/tests/multimodal/test_utils.py
@@ -233,7 +233,6 @@ async def test_fetch_video_http_with_dynamic_loader(
         assert metadata_sync["video_backend"] == "opencv_dynamic"
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     "case",
     [
@@ -264,7 +263,6 @@ async def test_fetch_video_http_with_dynamic_loader(
                 ("image", 0),
             ],
         ),
-
         # Two modalities
         ## Internally sorted
         dict(
@@ -276,7 +274,7 @@ async def test_fetch_video_http_with_dynamic_loader(
                 "audio": [
                     PlaceholderRange(offset=0, length=2),
                     PlaceholderRange(offset=2, length=3),
-                ]
+                ],
             },
             expected_modality_idxs=[
                 ("audio", 0),
@@ -295,7 +293,7 @@ async def test_fetch_video_http_with_dynamic_loader(
                 "audio": [
                     PlaceholderRange(offset=5, length=2),
                     PlaceholderRange(offset=11, length=4),
-                ]
+                ],
             },
             expected_modality_idxs=[
                 ("image", 0),
@@ -314,7 +312,7 @@ async def test_fetch_video_http_with_dynamic_loader(
                 "audio": [
                     PlaceholderRange(offset=11, length=4),
                     PlaceholderRange(offset=5, length=2),
-                ]
+                ],
             },
             expected_modality_idxs=[
                 ("image", 1),
@@ -323,7 +321,6 @@ async def test_fetch_video_http_with_dynamic_loader(
                 ("audio", 0),
             ],
         ),
-
         # Three modalities
         ## Internally sorted
         dict(
@@ -339,7 +336,7 @@ async def test_fetch_video_http_with_dynamic_loader(
                     PlaceholderRange(offset=3, length=4),
                     PlaceholderRange(offset=7, length=5),
                     PlaceholderRange(offset=12, length=6),
-                ]
+                ],
             },
             expected_modality_idxs=[
                 ("audio", 0),
@@ -363,7 +360,7 @@ async def test_fetch_video_http_with_dynamic_loader(
                 ],
                 "video": [
                     PlaceholderRange(offset=8, length=5),
-                ]
+                ],
             },
             expected_modality_idxs=[
                 ("image", 0),
@@ -386,7 +383,7 @@ async def test_fetch_video_http_with_dynamic_loader(
                 ],
                 "video": [
                     PlaceholderRange(offset=8, length=5),
-                ]
+                ],
             },
             expected_modality_idxs=[
                 ("image", 0),
@@ -398,7 +395,6 @@ async def test_fetch_video_http_with_dynamic_loader(
         ),
     ],
 )
-# yapf: enable
 def test_argsort_mm_positions(case):
     mm_positions = case["mm_positions"]
     expected_modality_idxs = case["expected_modality_idxs"]
@@ -413,13 +409,16 @@ def test_argsort_mm_positions(case):
 @pytest.mark.parametrize("num_frames", [-1, 32, 1800])
 async def test_allowed_media_domains(video_url: str, num_frames: int):
     connector = MediaConnector(
-        media_io_kwargs={"video": {
-            "num_frames": num_frames,
-        }},
+        media_io_kwargs={
+            "video": {
+                "num_frames": num_frames,
+            }
+        },
         allowed_media_domains=[
             "www.bogotobogo.com",
             "github.com",
-        ])
+        ],
+    )
 
     video_sync, metadata_sync = connector.fetch_video(video_url)
     video_async, metadata_async = await connector.fetch_video_async(video_url)
diff --git a/tests/test_inputs.py b/tests/test_inputs.py
index 41b9665bd7..50a273016a 100644
--- a/tests/test_inputs.py
+++ b/tests/test_inputs.py
@@ -59,48 +59,52 @@ def test_parse_raw_single_batch_string_slice(inputs_slice: slice):
     )
 
 
-# yapf: disable
-@pytest.mark.parametrize('mm_processor_kwargs,expected_mm_kwargs', [
-    (None, [{}, {}]),
-    ({}, [{}, {}]),
-    ({"foo": 100}, [{"foo": 100}, {"foo": 100}]),
-    ([{"foo": 100}, {"bar": 200}], [{"foo": 100}, {"bar": 200}]),
-])
-# yapf: enable
+@pytest.mark.parametrize(
+    "mm_processor_kwargs,expected_mm_kwargs",
+    [
+        (None, [{}, {}]),
+        ({}, [{}, {}]),
+        ({"foo": 100}, [{"foo": 100}, {"foo": 100}]),
+        ([{"foo": 100}, {"bar": 200}], [{"foo": 100}, {"bar": 200}]),
+    ],
+)
 def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
     """Test mm_processor_kwargs init for zipping enc/dec prompts."""
-    encoder_prompts = ['An encoder prompt', 'Another encoder prompt']
-    decoder_prompts = ['A decoder prompt', 'Another decoder prompt']
-    zipped_prompts = zip_enc_dec_prompts(encoder_prompts, decoder_prompts,
-                                         mm_processor_kwargs)
+    encoder_prompts = ["An encoder prompt", "Another encoder prompt"]
+    decoder_prompts = ["A decoder prompt", "Another decoder prompt"]
+    zipped_prompts = zip_enc_dec_prompts(
+        encoder_prompts, decoder_prompts, mm_processor_kwargs
+    )
     assert len(zipped_prompts) == len(encoder_prompts) == len(decoder_prompts)
-    for enc, dec, exp_kwargs, zipped in zip(encoder_prompts, decoder_prompts,
-                                            expected_mm_kwargs,
-                                            zipped_prompts):
+    for enc, dec, exp_kwargs, zipped in zip(
+        encoder_prompts, decoder_prompts, expected_mm_kwargs, zipped_prompts
+    ):
         assert isinstance(zipped, dict)
         assert len(zipped.keys()) == 3
-        assert zipped['encoder_prompt'] == enc
-        assert zipped['decoder_prompt'] == dec
-        assert zipped['mm_processor_kwargs'] == exp_kwargs
+        assert zipped["encoder_prompt"] == enc
+        assert zipped["decoder_prompt"] == dec
+        assert zipped["mm_processor_kwargs"] == exp_kwargs
 
 
-@pytest.mark.parametrize("model_id", [
-    "facebook/opt-125m",
-])
-@pytest.mark.parametrize("prompt", [
-    {
-        "prompt": "",
-        "multi_modal_data": {
-            "dummy": []
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "facebook/opt-125m",
+    ],
+)
+@pytest.mark.parametrize(
+    "prompt",
+    [
+        {
+            "prompt": "",
+            "multi_modal_data": {"dummy": []},
         },
-    },
-    {
-        "prompt_token_ids": [],
-        "multi_modal_data": {
-            "dummy": []
+        {
+            "prompt_token_ids": [],
+            "multi_modal_data": {"dummy": []},
         },
-    },
-])
+    ],
+)
 def test_preprocessor_text_no_mm_inputs(model_id, prompt):
     model_config = ModelConfig(model=model_id)
     tokenizer = init_tokenizer_from_configs(model_config)
@@ -110,15 +114,19 @@ def test_preprocessor_text_no_mm_inputs(model_id, prompt):
         input_preprocessor.preprocess(prompt)
 
 
-@pytest.mark.parametrize("model_id", [
-    "facebook/chameleon-7b",
-])
-@pytest.mark.parametrize("prompt", [
-    "",
-    {
-        "prompt_token_ids": []
-    },
-])
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "facebook/chameleon-7b",
+    ],
+)
+@pytest.mark.parametrize(
+    "prompt",
+    [
+        "",
+        {"prompt_token_ids": []},
+    ],
+)
 def test_preprocessor_always_mm_code_path(model_id, prompt):
     model_config = ModelConfig(model=model_id)
     tokenizer = init_tokenizer_from_configs(model_config)
diff --git a/tests/tpu/test_moe_pallas.py b/tests/tpu/test_moe_pallas.py
index a0f3e9d2c7..e3236d20bf 100644
--- a/tests/tpu/test_moe_pallas.py
+++ b/tests/tpu/test_moe_pallas.py
@@ -9,14 +9,10 @@ import pytest
 import torch
 import torch_xla
 
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.model_executor.layers.fused_moe.moe_pallas import fused_moe as pallas_moe
 from vllm.model_executor.layers.fused_moe.moe_torch_iterative import (
     fused_moe as torch_moe,
 )
-
-# yapf: enable
 from vllm.platforms import current_platform
 
 if not current_platform.is_tpu():
diff --git a/tests/utils_/test_utils.py b/tests/utils_/test_utils.py
index db94845a81..71c82feac3 100644
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -388,7 +388,6 @@ def test_duplicate_dict_args(caplog_vllm, parser):
     assert "-O.level" in caplog_vllm.text
 
 
-# yapf: enable
 @pytest.mark.parametrize(
     "callable,kw_name,requires_kw_only,allow_var_kwargs,is_supported",
     [
@@ -408,7 +407,6 @@ def test_duplicate_dict_args(caplog_vllm, parser):
         (lambda foo, **kwargs: None, "foo", True, True, False),
     ],
 )
-# yapf: disable
 def test_supports_kw(
     callable, kw_name, requires_kw_only, allow_var_kwargs, is_supported
 ):
@@ -681,7 +679,6 @@ def test_lru_cache():
     assert 6 in cache
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("src_dtype", "tgt_dtype", "expected_result"),
     [
@@ -715,12 +712,10 @@ def test_lru_cache():
         (torch.complex64, torch.complex32, False),
     ],
 )
-# yapf: enable
 def test_is_lossless_cast(src_dtype, tgt_dtype, expected_result):
     assert is_lossless_cast(src_dtype, tgt_dtype) == expected_result
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     ("dtypes", "expected_result"),
     [
@@ -730,7 +725,6 @@ def test_is_lossless_cast(src_dtype, tgt_dtype, expected_result):
         ([torch.bool, torch.int8, torch.float16, torch.complex32], torch.complex32),  # noqa: E501
     ],
 )
-# yapf: enable
 def test_common_broadcastable_dtype(dtypes, expected_result):
     assert common_broadcastable_dtype(dtypes) == expected_result
 
@@ -775,7 +769,6 @@ def test_placeholder_module_error_handling():
         _ = placeholder_attr.module
 
 
-# yapf: disable
 @pytest.mark.parametrize(
     "obj,key1,key2",
     [
@@ -785,8 +778,8 @@ def test_placeholder_module_error_handling():
         ({1: "a", 2: "b"}, 1, 3),
         # Tests for both keys do not exist
         ({1: "a", 2: "b"}, 3, 4),
-    ])
-# yapf: enable
+    ],
+)
 def test_swap_dict_values(obj, key1, key2):
     original_obj = obj.copy()
     swap_dict_values(obj, key1, key2)
@@ -800,26 +793,30 @@ def test_swap_dict_values(obj, key1, key2):
         assert key1 not in obj
 
 
-def test_model_specification(parser_with_config, cli_config_file,
-                             cli_config_file_with_model):
+def test_model_specification(
+    parser_with_config, cli_config_file, cli_config_file_with_model
+):
     # Test model in CLI takes precedence over config
     args = parser_with_config.parse_args(
-        ['serve', 'cli-model', '--config', cli_config_file_with_model])
-    assert args.model_tag == 'cli-model'
-    assert args.served_model_name == 'mymodel'
+        ["serve", "cli-model", "--config", cli_config_file_with_model]
+    )
+    assert args.model_tag == "cli-model"
+    assert args.served_model_name == "mymodel"
 
     # Test model from config file works
-    args = parser_with_config.parse_args([
-        'serve',
-        '--config',
-        cli_config_file_with_model,
-    ])
-    assert args.model == 'config-model'
-    assert args.served_model_name == 'mymodel'
+    args = parser_with_config.parse_args(
+        [
+            "serve",
+            "--config",
+            cli_config_file_with_model,
+        ]
+    )
+    assert args.model == "config-model"
+    assert args.served_model_name == "mymodel"
 
     # Test no model specified anywhere raises error
     with pytest.raises(ValueError, match="No model specified!"):
-        parser_with_config.parse_args(['serve', '--config', cli_config_file])
+        parser_with_config.parse_args(["serve", "--config", cli_config_file])
 
     # Test using --model option raises error
     # with pytest.raises(
@@ -833,47 +830,52 @@ def test_model_specification(parser_with_config, cli_config_file,
     # Test using --model option back-compatibility
     # (when back-compatibility ends, the above test should be uncommented
     # and the below test should be removed)
-    args = parser_with_config.parse_args([
-        'serve',
-        '--tensor-parallel-size',
-        '2',
-        '--model',
-        'my-model',
-        '--trust-remote-code',
-        '--port',
-        '8001',
-    ])
+    args = parser_with_config.parse_args(
+        [
+            "serve",
+            "--tensor-parallel-size",
+            "2",
+            "--model",
+            "my-model",
+            "--trust-remote-code",
+            "--port",
+            "8001",
+        ]
+    )
     assert args.model is None
     assert args.tensor_parallel_size == 2
     assert args.trust_remote_code is True
     assert args.port == 8001
 
-    args = parser_with_config.parse_args([
-        'serve',
-        '--tensor-parallel-size=2',
-        '--model=my-model',
-        '--trust-remote-code',
-        '--port=8001',
-    ])
+    args = parser_with_config.parse_args(
+        [
+            "serve",
+            "--tensor-parallel-size=2",
+            "--model=my-model",
+            "--trust-remote-code",
+            "--port=8001",
+        ]
+    )
     assert args.model is None
     assert args.tensor_parallel_size == 2
     assert args.trust_remote_code is True
     assert args.port == 8001
 
     # Test other config values are preserved
-    args = parser_with_config.parse_args([
-        'serve',
-        'cli-model',
-        '--config',
-        cli_config_file_with_model,
-    ])
+    args = parser_with_config.parse_args(
+        [
+            "serve",
+            "cli-model",
+            "--config",
+            cli_config_file_with_model,
+        ]
+    )
     assert args.tensor_parallel_size == 2
     assert args.trust_remote_code is True
     assert args.port == 12312
 
 
-@pytest.mark.parametrize("input", [(), ("abc", ), (None, ),
-                                   (None, bool, [1, 2, 3])])
+@pytest.mark.parametrize("input", [(), ("abc",), (None,), (None, bool, [1, 2, 3])])
 def test_sha256(input: tuple):
     digest = sha256(input)
     assert digest is not None
@@ -887,7 +889,7 @@ def test_sha256(input: tuple):
     assert digest == sha256(input)
 
     # hashing different input, returns different value
-    assert digest != sha256(input + (1, ))
+    assert digest != sha256(input + (1,))
 
 
 @pytest.mark.parametrize(
@@ -897,7 +899,8 @@ def test_sha256(input: tuple):
         ("tcp://127.0.0.1:5555", ("tcp", "127.0.0.1", "5555")),
         ("tcp://[::1]:5555", ("tcp", "::1", "5555")),  # IPv6 address
         ("inproc://some_identifier", ("inproc", "some_identifier", "")),
-    ])
+    ],
+)
 def test_split_zmq_path(path, expected):
     assert split_zmq_path(path) == expected
 
@@ -909,7 +912,8 @@ def test_split_zmq_path(path, expected):
         "tcp://127.0.0.1",  # Missing port
         "tcp://[::1]",  # Missing port for IPv6
         "tcp://:5555",  # Missing host
-    ])
+    ],
+)
 def test_split_zmq_path_invalid(invalid_path):
     with pytest.raises(ValueError):
         split_zmq_path(invalid_path)
@@ -931,8 +935,9 @@ def test_make_zmq_socket_ipv6():
     zsock: zmq.Socket = make_zmq_socket(ctx, ipv6_path, socket_type)
 
     # Verify that the IPV6 option is set
-    assert zsock.getsockopt(
-        zmq.IPV6) == 1, "IPV6 option should be enabled for IPv6 addresses"
+    assert zsock.getsockopt(zmq.IPV6) == 1, (
+        "IPV6 option should be enabled for IPv6 addresses"
+    )
 
     # Clean up
     zsock.close()
@@ -1019,15 +1024,14 @@ def test_convert_ids_list_to_tokens():
     tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
     token_ids = tokenizer.encode("Hello, world!")
     # token_ids = [9707, 11, 1879, 0]
-    assert tokenizer.convert_ids_to_tokens(token_ids) == [
-        'Hello', ',', 'Ġworld', '!'
-    ]
+    assert tokenizer.convert_ids_to_tokens(token_ids) == ["Hello", ",", "Ġworld", "!"]
     tokens = convert_ids_list_to_tokens(tokenizer, token_ids)
-    assert tokens == ['Hello', ',', ' world', '!']
+    assert tokens == ["Hello", ",", " world", "!"]
 
 
 def test_current_stream_multithread():
     import threading
+
     if not torch.cuda.is_available():
         pytest.skip("CUDA not available")
 
@@ -1046,13 +1050,18 @@ def test_current_stream_multithread():
     child_thread.start()
 
     try:
-        assert thread_stream_ready.wait(
-            timeout=5), "Child thread failed to enter stream context in time"
+        assert thread_stream_ready.wait(timeout=5), (
+            "Child thread failed to enter stream context in time"
+        )
 
         main_current_stream = current_stream()
 
-        assert main_current_stream != child_stream, "Main thread's current_stream was contaminated by child thread"
-        assert main_current_stream == main_default_stream, "Main thread's current_stream is not the default stream"
+        assert main_current_stream != child_stream, (
+            "Main thread's current_stream was contaminated by child thread"
+        )
+        assert main_current_stream == main_default_stream, (
+            "Main thread's current_stream is not the default stream"
+        )
 
         # Notify child thread it can exit
         thread_can_exit.set()
@@ -1070,7 +1079,7 @@ def test_load_config_file(tmp_path):
         "enable-logging": True,
         "list-arg": ["item1", "item2"],
         "port": 12323,
-        "tensor-parallel-size": 4
+        "tensor-parallel-size": 4,
     }
 
     # Write the configuration data to a temporary YAML file
diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py
index 53c71609cf..aed00a60ae 100644
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -16,9 +16,6 @@ from vllm.multimodal.inputs import (
 from vllm.sampling_params import SamplingParams
 from vllm.utils import GiB_bytes, sha256, sha256_cbor
 from vllm.v1.core.kv_cache_manager import KVCacheManager
-
-# disable yapf here as it formats differently than isort such that both fail
-# yapf: disable
 from vllm.v1.core.kv_cache_utils import (
     BlockHash,
     FreeKVCacheBlockQueue,
@@ -48,8 +45,6 @@ from vllm.v1.kv_cache_interface import (
 from vllm.v1.metrics.stats import PrefixCacheStats
 from vllm.v1.request import Request
 
-# yapf: enable
-
 pytestmark = pytest.mark.cpu_test
 
 
diff --git a/tests/v1/logits_processors/test_correctness.py b/tests/v1/logits_processors/test_correctness.py
index 5baa11af02..34997b7e7a 100644
--- a/tests/v1/logits_processors/test_correctness.py
+++ b/tests/v1/logits_processors/test_correctness.py
@@ -22,8 +22,6 @@ from vllm.config import VllmConfig
 from vllm.platforms import current_platform
 from vllm.sampling_params import SamplingParams
 from vllm.utils import is_pin_memory_available
-
-# yapf: disable
 from vllm.v1.sample.logits_processor import (
     BatchUpdate,
     BatchUpdateBuilder,
@@ -34,8 +32,6 @@ from vllm.v1.sample.logits_processor import (
     MoveDirectionality,
     build_logitsprocs,
 )
-
-# yapf: enable
 from vllm.v1.sample.metadata import SamplingMetadata
 
 PIN_MEMORY_AVAILABLE = is_pin_memory_available()
diff --git a/tests/v1/logits_processors/test_custom_offline.py b/tests/v1/logits_processors/test_custom_offline.py
index 6ee474270c..b83129858b 100644
--- a/tests/v1/logits_processors/test_custom_offline.py
+++ b/tests/v1/logits_processors/test_custom_offline.py
@@ -7,8 +7,6 @@ from typing import Union
 import pytest
 
 from tests.utils import create_new_process_for_each_test
-
-# yapf: disable
 from tests.v1.logits_processors.utils import (
     DUMMY_LOGITPROC_ARG,
     DUMMY_LOGITPROC_FQCN,
@@ -24,8 +22,6 @@ from tests.v1.logits_processors.utils import (
     prompts,
 )
 from tests.v1.logits_processors.utils import entry_points as fake_entry_points
-
-# yapf: enable
 from vllm import LLM, SamplingParams
 from vllm.v1.sample.logits_processor import (
     STR_POOLING_REJECTS_LOGITSPROCS,
diff --git a/tests/v1/logits_processors/test_custom_online.py b/tests/v1/logits_processors/test_custom_online.py
index dbec35a508..9c5b4ff0ba 100644
--- a/tests/v1/logits_processors/test_custom_online.py
+++ b/tests/v1/logits_processors/test_custom_online.py
@@ -11,8 +11,6 @@ import pytest
 import pytest_asyncio
 
 from tests.utils import RemoteOpenAIServerCustom, create_new_process_for_each_test
-
-# yapf: disable
 from tests.v1.logits_processors.utils import (
     DUMMY_LOGITPROC_ARG,
     DUMMY_LOGITPROC_FQCN,
@@ -25,8 +23,6 @@ from tests.v1.logits_processors.utils import (
 )
 from tests.v1.logits_processors.utils import entry_points as fake_entry_points
 
-# yapf: enable
-
 
 def _server_with_logitproc_entrypoint(
     env_dict: Optional[dict[str, str]],
diff --git a/vllm/distributed/kv_transfer/kv_connector/factory.py b/vllm/distributed/kv_transfer/kv_connector/factory.py
index 329263afba..395a4e20e0 100644
--- a/vllm/distributed/kv_transfer/kv_connector/factory.py
+++ b/vllm/distributed/kv_transfer/kv_connector/factory.py
@@ -4,7 +4,6 @@
 import importlib
 from typing import TYPE_CHECKING, Callable
 
-# yapf: disable
 import vllm.envs as envs
 from vllm.distributed.kv_transfer.kv_connector.base import (
     KVConnectorBase,
@@ -13,8 +12,6 @@ from vllm.distributed.kv_transfer.kv_connector.base import (
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole
 from vllm.logger import init_logger
 
-# yapf: enable
-
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
     from vllm.config.kv_transfer import KVTransferConfig
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index e971ef2737..d7ba70381d 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# yapf: disable
 import argparse
 import copy
 import dataclasses
@@ -88,8 +87,6 @@ from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import FlexibleArgumentParser, GiB_bytes, get_ip, is_in_ray_actor
 from vllm.v1.sample.logits_processor import LogitsProcessor
 
-# yapf: enable
-
 if TYPE_CHECKING:
     from vllm.executor.executor_base import ExecutorBase
     from vllm.model_executor.layers.quantization import QuantizationMethods
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index cfba1ae6e5..dfa1977c37 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -17,9 +17,6 @@ import jinja2.nodes
 import jinja2.parser
 import jinja2.sandbox
 import transformers.utils.chat_template_utils as hf_chat_utils
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from openai.types.chat import (
     ChatCompletionAssistantMessageParam,
     ChatCompletionContentPartImageParam,
@@ -40,8 +37,6 @@ from openai.types.responses import ResponseInputImageParam
 from openai_harmony import Message as OpenAIHarmonyMessage
 from PIL import Image
 from pydantic import BaseModel, ConfigDict, TypeAdapter
-
-# yapf: enable
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin
 
 # pydantic needs the TypedDict from typing_extensions
@@ -52,11 +47,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.models import SupportsMultiModal
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict, MultiModalUUIDDict
 from vllm.multimodal.utils import MediaConnector
-
-# yapf: disable
 from vllm.transformers_utils.chat_templates import get_chat_template_fallback_path
-
-# yapf: enable
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
 from vllm.utils import random_uuid, supports_kw
@@ -317,11 +308,7 @@ def _is_var_or_elems_access(
     ):
         return _is_var_or_elems_access(node.node, varname, key)
 
-    # yapf: disable
-    return (
-        _is_attr_access(node, varname, key) if key
-        else _is_var_access(node, varname)
-    ) # yapf: enable
+    return _is_attr_access(node, varname, key) if key else _is_var_access(node, varname)
 
 
 def _iter_nodes_assign_var_or_elems(root: jinja2.nodes.Node, varname: str):
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 944de70c1d..0702e40441 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -39,9 +39,6 @@ from vllm.entrypoints.chat_utils import (
     parse_chat_messages,
     resolve_chat_template_content_format,
 )
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.score_utils import (
     ScoreContentPartParam,
     ScoreMultiModalParam,
@@ -50,8 +47,6 @@ from vllm.entrypoints.score_utils import (
     compress_token_type_ids,
     get_score_prompt,
 )
-
-# yapf: enable
 from vllm.entrypoints.utils import _validate_truncation_size, log_non_default_args
 from vllm.inputs import (
     DataPrompt,
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 70717b761d..889326dee7 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -49,9 +49,6 @@ from vllm.entrypoints.chat_utils import (
 from vllm.entrypoints.launcher import serve_http
 from vllm.entrypoints.logger import RequestLogger
 from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     ChatCompletionRequest,
     ChatCompletionResponse,
@@ -84,8 +81,6 @@ from vllm.entrypoints.openai.protocol import (
     TranslationResponse,
     UnloadLoRAAdapterRequest,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
 from vllm.entrypoints.openai.serving_classification import ServingClassification
 from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index a4c3f53782..221b88d796 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -11,8 +11,6 @@ from typing import Annotated, Any, ClassVar, Generic, Literal, Optional, TypeVar
 import regex as re
 import torch
 from fastapi import HTTPException, UploadFile
-
-# yapf: disable
 from openai.types.chat.chat_completion_audio import (
     ChatCompletionAudio as OpenAIChatCompletionAudio,
 )
@@ -46,8 +44,6 @@ from openai.types.responses import ResponseCreatedEvent as OpenAIResponseCreated
 from openai.types.responses import (
     ResponseInProgressEvent as OpenAIResponseInProgressEvent,
 )
-
-# yapf: enable
 from openai.types.responses.response_reasoning_item import (
     Content as ResponseReasoningTextContent,
 )
diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py
index 466757468a..030ce3ce08 100644
--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@@ -18,8 +18,6 @@ from vllm.config import VllmConfig
 from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.logger import RequestLogger
-
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     BatchRequestInput,
     BatchRequestOutput,
@@ -30,8 +28,6 @@ from vllm.entrypoints.openai.protocol import (
     RerankResponse,
     ScoreResponse,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
 from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
 from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index ec1787d385..12dd474936 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -1733,13 +1733,15 @@ class OpenAIServingChat(OpenAIServing):
         is a tool call with arguments.
         """
 
-        # yapf: disable
         return bool(
             # if there is a delta message that includes tool calls which
             # include a function that has arguments
             output.finish_reason is not None
-            and self.enable_auto_tools and self.tool_parser and delta_message
-            and delta_message.tool_calls and delta_message.tool_calls[0]
+            and self.enable_auto_tools
+            and self.tool_parser
+            and delta_message
+            and delta_message.tool_calls
+            and delta_message.tool_calls[0]
             and delta_message.tool_calls[0].function
             and delta_message.tool_calls[0].function.arguments is not None
         )
diff --git a/vllm/entrypoints/openai/serving_classification.py b/vllm/entrypoints/openai/serving_classification.py
index b37eb5c68a..25e167e9bb 100644
--- a/vllm/entrypoints/openai/serving_classification.py
+++ b/vllm/entrypoints/openai/serving_classification.py
@@ -18,8 +18,6 @@ from vllm.entrypoints.openai.protocol import (
     ErrorResponse,
     UsageInfo,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_engine import (
     ClassificationServeContext,
     OpenAIServing,
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 8cd4606579..ce0a6c0e23 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -13,9 +13,6 @@ from fastapi import Request
 from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.logger import RequestLogger
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     CompletionLogProbs,
     CompletionRequest,
@@ -29,8 +26,6 @@ from vllm.entrypoints.openai.protocol import (
     UsageInfo,
 )
 from vllm.entrypoints.openai.serving_engine import OpenAIServing, clamp_prompt_logprobs
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
 from vllm.entrypoints.renderer import RenderConfig
 from vllm.entrypoints.utils import get_max_tokens
diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py
index 93c1f2548c..5517ab2802 100644
--- a/vllm/entrypoints/openai/serving_embedding.py
+++ b/vllm/entrypoints/openai/serving_embedding.py
@@ -14,9 +14,6 @@ from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
 from vllm.entrypoints.logger import RequestLogger
-
-# yapf conflicts with isort for this docstring
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     EmbeddingChatRequest,
     EmbeddingCompletionRequest,
@@ -32,8 +29,6 @@ from vllm.entrypoints.openai.serving_engine import (
     ServeContext,
     TextTokensPrompt,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
 from vllm.entrypoints.renderer import RenderConfig
 from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 27b9cac970..596ae3fcdc 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -28,9 +28,6 @@ else:
 import vllm.envs as envs
 from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.chat_utils import (
     ChatCompletionMessageParam,
     ChatTemplateContentFormatOption,
@@ -72,8 +69,6 @@ from vllm.entrypoints.openai.protocol import (
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
 from vllm.entrypoints.openai.tool_parsers import ToolParser
 from vllm.entrypoints.renderer import BaseRenderer, CompletionRenderer, RenderConfig
-
-# yapf: enable
 from vllm.inputs.data import PromptType
 from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
 from vllm.inputs.parse import PromptComponents, get_prompt_components
diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py
index 457d606432..390b388e30 100644
--- a/vllm/entrypoints/openai/serving_pooling.py
+++ b/vllm/entrypoints/openai/serving_pooling.py
@@ -17,8 +17,6 @@ from vllm.config import VllmConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
 from vllm.entrypoints.logger import RequestLogger
-
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     ErrorResponse,
     IOProcessorRequest,
@@ -30,8 +28,6 @@ from vllm.entrypoints.openai.protocol import (
     PoolingResponseData,
     UsageInfo,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_engine import OpenAIServing
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
 from vllm.entrypoints.renderer import RenderConfig
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 385ef41411..1b25fd4eb2 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -14,9 +14,6 @@ from typing import Callable, Final, Optional, Union
 
 import jinja2
 from fastapi import Request
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from openai.types.responses import (
     ResponseCodeInterpreterCallCodeDeltaEvent,
     ResponseCodeInterpreterCallCodeDoneEvent,
@@ -46,8 +43,6 @@ from openai.types.responses import (
     response_text_delta_event,
 )
 from openai.types.responses.response_output_text import Logprob, LogprobTopLogprob
-
-# yapf: enable
 from openai.types.responses.response_reasoning_item import (
     Content as ResponseReasoningTextContent,
 )
@@ -78,9 +73,6 @@ from vllm.entrypoints.harmony_utils import (
     render_for_completion,
 )
 from vllm.entrypoints.logger import RequestLogger
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     DeltaMessage,
     ErrorResponse,
@@ -97,8 +89,6 @@ from vllm.entrypoints.openai.protocol import (
     ResponseUsage,
     StreamingResponsesResponse,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_engine import OpenAIServing
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
 from vllm.entrypoints.tool_server import ToolServer
diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py
index 393fd2c2e0..234a314218 100644
--- a/vllm/entrypoints/openai/serving_score.py
+++ b/vllm/entrypoints/openai/serving_score.py
@@ -24,9 +24,6 @@ from vllm.entrypoints.openai.protocol import (
 )
 from vllm.entrypoints.openai.serving_engine import OpenAIServing
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.score_utils import (
     ScoreContentPartParam,
     ScoreMultiModalParam,
@@ -35,8 +32,6 @@ from vllm.entrypoints.score_utils import (
     compress_token_type_ids,
     get_score_prompt,
 )
-
-# yapf: enable
 from vllm.entrypoints.utils import _validate_truncation_size
 from vllm.inputs.data import TokensPrompt
 from vllm.logger import init_logger
diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py
index fd6c6be9ee..7b192dcd6c 100644
--- a/vllm/entrypoints/openai/serving_tokenization.py
+++ b/vllm/entrypoints/openai/serving_tokenization.py
@@ -10,9 +10,6 @@ from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
 from vllm.entrypoints.logger import RequestLogger
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.entrypoints.openai.protocol import (
     DetokenizeRequest,
     DetokenizeResponse,
@@ -22,8 +19,6 @@ from vllm.entrypoints.openai.protocol import (
     TokenizeResponse,
     TokenizerInfoResponse,
 )
-
-# yapf: enable
 from vllm.entrypoints.openai.serving_engine import OpenAIServing
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
 from vllm.entrypoints.renderer import RenderConfig
diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py
index c4783edda7..be124f7643 100644
--- a/vllm/executor/ray_distributed_executor.py
+++ b/vllm/executor/ray_distributed_executor.py
@@ -11,7 +11,7 @@ import cloudpickle
 import msgspec
 
 import vllm.envs as envs
-from vllm.executor.executor_base import DistributedExecutorBase  # yapf: disable
+from vllm.executor.executor_base import DistributedExecutorBase
 from vllm.executor.msgspec_utils import encode_hook
 from vllm.executor.ray_utils import RayWorkerWrapper, initialize_ray_cluster, ray
 from vllm.logger import init_logger
diff --git a/vllm/lora/layers/base_linear.py b/vllm/lora/layers/base_linear.py
index 0b5a4a73a9..d2f017c19c 100644
--- a/vllm/lora/layers/base_linear.py
+++ b/vllm/lora/layers/base_linear.py
@@ -8,8 +8,6 @@ from transformers import PretrainedConfig
 
 from vllm.config.lora import LoRAConfig
 from vllm.distributed.utils import divide
-
-# yapf: disable
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     LinearBase,
@@ -23,7 +21,6 @@ from .utils import _get_lora_device
 
 
 class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
-
     def __init__(self, base_layer: LinearBase):
         super().__init__()
         self.base_layer = base_layer
@@ -50,16 +47,20 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
             lora_b_out_size = self.output_size
 
         elif isinstance(self.base_layer, ColumnParallelLinear):
-            lora_a_out_size = (lora_config.max_lora_rank if
-                               not lora_config.fully_sharded_loras else divide(
-                                   lora_config.max_lora_rank, self.tp_size))
+            lora_a_out_size = (
+                lora_config.max_lora_rank
+                if not lora_config.fully_sharded_loras
+                else divide(lora_config.max_lora_rank, self.tp_size)
+            )
             lora_b_out_size = self.output_size
 
         elif isinstance(self.base_layer, RowParallelLinear):
             lora_a_out_size = lora_config.max_lora_rank
-            lora_b_out_size = (self.output_size if
-                               not lora_config.fully_sharded_loras else divide(
-                                   self.output_size, self.tp_size))
+            lora_b_out_size = (
+                self.output_size
+                if not lora_config.fully_sharded_loras
+                else divide(self.output_size, self.tp_size)
+            )
         else:
             raise NotImplementedError
 
@@ -71,7 +72,9 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
                 self.input_size,
                 dtype=lora_config.lora_dtype,
                 device=self.device,
-            ) for _ in range(self.n_slices))
+            )
+            for _ in range(self.n_slices)
+        )
         self.lora_b_stacked = tuple(
             torch.zeros(
                 max_loras,
@@ -80,7 +83,9 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
                 lora_config.max_lora_rank,
                 dtype=lora_config.lora_dtype,
                 device=self.device,
-            ) for _ in range(self.n_slices))
+            )
+            for _ in range(self.n_slices)
+        )
         if lora_config.bias_enabled:
             lora_bias_out_size = lora_b_out_size
             self.lora_bias_stacked = tuple(
@@ -90,8 +95,10 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
                     lora_bias_out_size,
                     dtype=lora_config.lora_dtype,
                     device=self.device,
-                ) for _ in range(self.n_slices))
-        self.output_slices = (self.lora_b_stacked[0].shape[2], )
+                )
+                for _ in range(self.n_slices)
+            )
+        self.output_slices = (self.lora_b_stacked[0].shape[2],)
 
     def reset_lora(self, index: int):
         for s_index in range(self.n_slices):
@@ -99,8 +106,9 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
             self.lora_b_stacked[s_index][index] = 0
             if self.lora_config.bias_enabled:
                 # Make mypy happy
-                self.lora_bias_stacked = cast(tuple[torch.Tensor, ...],
-                                              self.lora_bias_stacked)
+                self.lora_bias_stacked = cast(
+                    tuple[torch.Tensor, ...], self.lora_bias_stacked
+                )
                 self.lora_bias_stacked[s_index][index] = 0
 
     def set_lora(
@@ -115,8 +123,9 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
         # MergedColumnParallelLinearWithLoRA, all other linear LoRA layers
         # store weights in a tuple of size 1. These two layers will
         # override this function.
-        assert (len(self.lora_a_stacked) == len(self.lora_b_stacked) ==
-                self.n_slices == 1)
+        assert (
+            len(self.lora_a_stacked) == len(self.lora_b_stacked) == self.n_slices == 1
+        )
 
         self.reset_lora(index)
         if self.tp_size > 1:
@@ -125,23 +134,24 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
             if lora_bias is not None:
                 lora_bias = self.slice_bias(lora_bias)
 
-        self.lora_a_stacked[0][index,
-                               0, :lora_a.shape[0], :lora_a.shape[1]].copy_(
-                                   lora_a, non_blocking=True)
-        self.lora_b_stacked[0][index,
-                               0, :lora_b.shape[0], :lora_b.shape[1]].copy_(
-                                   lora_b, non_blocking=True)
+        self.lora_a_stacked[0][index, 0, : lora_a.shape[0], : lora_a.shape[1]].copy_(
+            lora_a, non_blocking=True
+        )
+        self.lora_b_stacked[0][index, 0, : lora_b.shape[0], : lora_b.shape[1]].copy_(
+            lora_b, non_blocking=True
+        )
         if lora_bias is not None:
-
-            self.lora_bias_stacked = cast(tuple[torch.Tensor, ...],
-                                          self.lora_bias_stacked)
+            self.lora_bias_stacked = cast(
+                tuple[torch.Tensor, ...], self.lora_bias_stacked
+            )
             assert len(self.lora_bias_stacked)
-            self.lora_bias_stacked[0][index, 0, :lora_bias.shape[0]].copy_(
-                lora_bias, non_blocking=True)
+            self.lora_bias_stacked[0][index, 0, : lora_bias.shape[0]].copy_(
+                lora_bias, non_blocking=True
+            )
 
-    def apply(self,
-              x: torch.Tensor,
-              bias: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def apply(
+        self, x: torch.Tensor, bias: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
         output = self.base_layer.quant_method.apply(self.base_layer, x, bias)
 
         # In transformers backend, x and output have extra batch dimension like
@@ -151,10 +161,15 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
             output = output.flatten(0, 1)
             x = x.flatten(0, 1)
 
-        lora_output: Optional[
-            torch.Tensor] = self.punica_wrapper.add_lora_linear(
-                output, x, self.lora_a_stacked, self.lora_b_stacked,
-                self.lora_bias_stacked, 1.0, self.output_slices)
+        lora_output: Optional[torch.Tensor] = self.punica_wrapper.add_lora_linear(
+            output,
+            x,
+            self.lora_a_stacked,
+            self.lora_b_stacked,
+            self.lora_bias_stacked,
+            1.0,
+            self.output_slices,
+        )
         if not current_platform.can_update_inplace():
             output = lora_output
 
@@ -162,7 +177,6 @@ class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
 
     @property
     def weight(self) -> torch.Tensor:
-
         # unquantizedLinear
         if hasattr(self.base_layer, "weight"):
             return self.base_layer.weight
diff --git a/vllm/lora/layers/row_parallel_linear.py b/vllm/lora/layers/row_parallel_linear.py
index 4e6b65ca97..738371f22a 100644
--- a/vllm/lora/layers/row_parallel_linear.py
+++ b/vllm/lora/layers/row_parallel_linear.py
@@ -12,8 +12,6 @@ from vllm.distributed import (
     split_tensor_along_last_dim,
     tensor_model_parallel_all_reduce,
 )
-
-# yapf: disable
 from vllm.model_executor.layers.linear import RowParallelLinear
 from vllm.platforms import current_platform
 
@@ -22,7 +20,6 @@ from .utils import _fully_sharded_can_replace, _not_fully_sharded_can_replace
 
 
 class RowParallelLinearWithLoRA(BaseLinearLayerWithLoRA):
-
     def __init__(self, base_layer: RowParallelLinear) -> None:
         super().__init__(base_layer)
 
@@ -33,11 +30,10 @@ class RowParallelLinearWithLoRA(BaseLinearLayerWithLoRA):
         self.n_slices = 1
 
     def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor:
-
         shard_size = self.input_size
         start_idx = self.tp_rank * shard_size
         end_idx = (self.tp_rank + 1) * shard_size
-        lora_a = lora_a[:,start_idx:end_idx]
+        lora_a = lora_a[:, start_idx:end_idx]
         return lora_a
 
     def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor:
@@ -66,7 +62,8 @@ class RowParallelLinearWithLoRA(BaseLinearLayerWithLoRA):
         else:
             # TODO: simplify code below
             splitted_input = split_tensor_along_last_dim(
-                input_, num_partitions=self.tp_size)
+                input_, num_partitions=self.tp_size
+            )
             input_parallel = splitted_input[self.tp_rank].contiguous()
 
         # Matrix multiply.
@@ -77,8 +74,11 @@ class RowParallelLinearWithLoRA(BaseLinearLayerWithLoRA):
             output_ = output_parallel
 
         if not self.base_layer.skip_bias_add:
-            output = (output_ + self.base_layer.bias
-                      if self.base_layer.bias is not None else output_)
+            output = (
+                output_ + self.base_layer.bias
+                if self.base_layer.bias is not None
+                else output_
+            )
             output_bias = None
         else:
             output = output_
@@ -101,11 +101,11 @@ class RowParallelLinearWithLoRA(BaseLinearLayerWithLoRA):
         return type(source_layer) is RowParallelLinear
 
 
-
 # The following layer is based on the tensor parallelism strategy given in
 # Y. Sheng et al., S-LoRA: Serving Thousands of Concurrent LoRA Adapters. 2023,
 # https://arxiv.org/abs/2311.03285.
 
+
 class RowParallelLinearWithShardedLoRA(RowParallelLinearWithLoRA):
     """
     Differs from RowParallelLinearWithLoRA by slicing the
@@ -120,28 +120,26 @@ class RowParallelLinearWithShardedLoRA(RowParallelLinearWithLoRA):
         shard_size = self.lora_b_stacked[0].shape[2]
         start_idx = self.tp_rank * shard_size
         end_idx = (self.tp_rank + 1) * shard_size
-        lora_b = lora_b[ start_idx:end_idx,:]
+        lora_b = lora_b[start_idx:end_idx, :]
         return lora_b
 
     def slice_bias(self, bias: torch.Tensor) -> torch.Tensor:
         if bias is None:
             return bias
-        self.lora_bias_stacked = cast(tuple[torch.Tensor, ...],
-                                      self.lora_bias_stacked)
+        self.lora_bias_stacked = cast(tuple[torch.Tensor, ...], self.lora_bias_stacked)
         shard_size = self.lora_bias_stacked[0].shape[2]
         start_idx = self.tp_rank * shard_size
         end_idx = (self.tp_rank + 1) * shard_size
         bias = bias[start_idx:end_idx]
         return bias
 
-    def apply(self,
-              x: torch.Tensor,
-              bias: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def apply(
+        self, x: torch.Tensor, bias: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
         output = self.base_layer.quant_method.apply(self.base_layer, x)
 
         x = x.view(-1, x.shape[-1])
-        output, out_orig_shape = output.view(-1,
-                                             output.shape[-1]), output.shape
+        output, out_orig_shape = output.view(-1, output.shape[-1]), output.shape
         buffer = torch.zeros(
             (self.n_slices, x.shape[0], self.lora_a_stacked[0].shape[2]),
             dtype=torch.float32,
@@ -149,10 +147,11 @@ class RowParallelLinearWithShardedLoRA(RowParallelLinearWithLoRA):
         )
 
         shrunk_buffer: Optional[torch.Tensor] = self.punica_wrapper.add_shrink(
-            buffer, x, self.lora_a_stacked, 1.0)
+            buffer, x, self.lora_a_stacked, 1.0
+        )
         if not current_platform.can_update_inplace():
             buffer = shrunk_buffer
-        if self.tp_size>1:
+        if self.tp_size > 1:
             buffer = tensor_model_parallel_all_reduce(buffer)
 
         # following S-LoRA, allows the fusing of all_gather and all_reduce
diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py
index eeff72ef1a..5e55d44ce8 100644
--- a/vllm/lora/utils.py
+++ b/vllm/lora/utils.py
@@ -19,8 +19,6 @@ from vllm.config.lora import LoRAConfig
 from vllm.logger import init_logger
 
 # being imported for _all_lora_classes below
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.lora.layers import (
     BaseLayerWithLoRA,
     ColumnParallelLinearWithLoRA,
@@ -39,8 +37,6 @@ from vllm.lora.layers import (
 )
 from vllm.model_executor.layers.linear import LinearBase
 
-# yapf: enable
-
 if TYPE_CHECKING:
     from vllm.model_executor.layers.logits_processor import LogitsProcessor
     from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index d2bc339d2b..fb294384c8 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -14,8 +14,6 @@ import vllm.envs as envs
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm import _custom_ops as ops
 from vllm.logger import init_logger
-
-# yapf: disable
 from vllm.model_executor.layers.fused_moe.config import (
     FUSED_MOE_UNQUANTIZED_CONFIG,
     FusedMoEQuantConfig,
@@ -25,8 +23,6 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import (
     _valid_cutlass_block_scaled_grouped_gemm,
     run_cutlass_block_scaled_fused_experts,
 )
-
-# yapf: enable
 from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
     _valid_deep_gemm,
     deep_gemm_moe_fp8,
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index ec517b63d5..1df28a4f7c 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -24,8 +24,6 @@ from vllm.distributed.eplb.eplb_state import EplbState
 from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
-
-# yapf: disable
 from vllm.model_executor.layers.fused_moe.config import (
     FUSED_MOE_UNQUANTIZED_CONFIG,
     FusedMoEConfig,
@@ -34,8 +32,6 @@ from vllm.model_executor.layers.fused_moe.config import (
     biased_moe_quant_config,
 )
 from vllm.model_executor.layers.fused_moe.fused_moe import zero_experts_compute_triton
-
-# yapf: enable
 from vllm.model_executor.layers.fused_moe.modular_kernel import (
     FusedMoEActivationFormat,
     FusedMoEModularKernel,
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
index e5c65309ec..109f068439 100644
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -10,7 +10,7 @@ import torch
 
 import vllm.envs as envs
 from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
-from vllm.model_executor.layers.fused_moe.utils import (  # yapf: disable
+from vllm.model_executor.layers.fused_moe.utils import (
     _resize_cache,
     count_expert_num_tokens,
 )
diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index e9c4afba13..3881ba12fa 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -24,8 +24,6 @@ from vllm.model_executor.layers.quantization.base_config import (
     QuantizeMethodBase,
 )
 from vllm.model_executor.layers.utils import dispatch_unquantized_gemm
-
-# yapf: disable
 from vllm.model_executor.parameter import (
     BasevLLMParameter,
     BlockQuantScaleParameter,
@@ -35,8 +33,6 @@ from vllm.model_executor.parameter import (
     PerTensorScaleParameter,
     RowvLLMParameter,
 )
-
-# yapf: enable
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
 from vllm.utils import GiB_bytes
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a8_fp8.py
index 9246965740..59d99e1e1c 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a8_fp8.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a8_fp8.py
@@ -17,17 +17,12 @@ from vllm.model_executor.layers.quantization.kernels.mixed_precision import (
 from vllm.model_executor.layers.quantization.utils.marlin_utils import (
     marlin_repeat_scales_on_all_ranks,
 )
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.model_executor.parameter import (
     BasevLLMParameter,
     ChannelQuantScaleParameter,
     GroupQuantScaleParameter,
     PackedvLLMParameter,
 )
-
-# yapf: enable
 from vllm.scalar_type import scalar_types
 
 logger = init_logger(__name__)
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
index 7ca5760df8..188fc15fd9 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
@@ -17,9 +17,6 @@ from vllm.model_executor.layers.quantization.kernels.mixed_precision import (
 from vllm.model_executor.layers.quantization.utils.marlin_utils import (
     marlin_repeat_scales_on_all_ranks,
 )
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.model_executor.parameter import (
     BasevLLMParameter,
     ChannelQuantScaleParameter,
@@ -28,8 +25,6 @@ from vllm.model_executor.parameter import (
     PackedvLLMParameter,
     RowvLLMParameter,
 )
-
-# yapf: enable
 from vllm.scalar_type import scalar_types
 
 logger = init_logger(__name__)
diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
index 13250bd394..b296a390f1 100644
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -22,8 +22,6 @@ from vllm.distributed import (
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
-
-# yapf: enable
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe import FusedMoE
 from vllm.model_executor.layers.linear import (
@@ -51,8 +49,6 @@ from vllm.model_executor.utils import (
 )
 from vllm.platforms import current_platform
 
-# yapf conflicts with isort for this block
-
 logger = init_logger(__name__)
 
 
diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py
index cefdbf4fef..7db118ca07 100644
--- a/vllm/model_executor/models/aria.py
+++ b/vllm/model_executor/models/aria.py
@@ -39,13 +39,10 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
-# yapf: disable
 from .idefics2_vision_model import Idefics2VisionConfig
 from .idefics2_vision_model import (
     Idefics2VisionTransformer as Idefics3VisionTransformer,
 )
-
-# yapf: enable
 from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsQuant
 from .llama import LlamaDecoderLayer, LlamaMLP, LlamaModel
 from .utils import (
diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py
index 5ae78cc921..95b0b0dab5 100644
--- a/vllm/model_executor/models/gemma3_mm.py
+++ b/vllm/model_executor/models/gemma3_mm.py
@@ -22,8 +22,6 @@ from vllm.multimodal.inputs import (
     MultiModalKwargsItems,
 )
 from vllm.multimodal.parse import ImageProcessorItems, ImageSize, MultiModalDataItems
-
-# yapf: disable
 from vllm.multimodal.processing import (
     BaseMultiModalProcessor,
     BaseProcessingInfo,
@@ -35,8 +33,6 @@ from vllm.multimodal.processing import (
     PromptUpdateDetails,
     replace_token_matches,
 )
-
-# yapf: enable
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py
index c9647fb55c..bef087b7a0 100644
--- a/vllm/model_executor/models/gemma3n_mm.py
+++ b/vllm/model_executor/models/gemma3n_mm.py
@@ -6,14 +6,16 @@ from typing import Annotated, Any, Literal, Optional, Union, cast
 import numpy as np
 import torch
 
-# yapf: disable
 from torch import nn
 from transformers import AutoModel, BatchFeature
-from transformers.models.gemma3n import (Gemma3nAudioConfig,
-                                         Gemma3nAudioFeatureExtractor,
-                                         Gemma3nConfig, Gemma3nProcessor,
-                                         Gemma3nTextConfig,
-                                         Gemma3nVisionConfig)
+from transformers.models.gemma3n import (
+    Gemma3nAudioConfig,
+    Gemma3nAudioFeatureExtractor,
+    Gemma3nConfig,
+    Gemma3nProcessor,
+    Gemma3nTextConfig,
+    Gemma3nVisionConfig,
+)
 from transformers.models.siglip import SiglipImageProcessorFast
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
@@ -22,25 +24,32 @@ from vllm.inputs.data import PromptType
 from vllm.logger import init_logger
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import RowParallelLinear
-from vllm.model_executor.layers.vocab_parallel_embedding import (
-    VocabParallelEmbedding)
+from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
 from vllm.model_executor.models.gemma3n import Gemma3nForCausalLM
 from vllm.model_executor.models.module_mapping import MultiModelKeys
 from vllm.model_executor.models.whisper import ISO639_1_SUPPORTED_LANGS
 from vllm.multimodal import MULTIMODAL_REGISTRY
-from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
-                                    MultiModalKwargsItems)
-from vllm.multimodal.parse import (ImageProcessorItems, MultiModalDataItems,
-                                   MultiModalDataParser)
-from vllm.multimodal.processing import (BaseMultiModalProcessor,
-                                        BaseProcessingInfo,
-                                        MultiModalPromptUpdates,
-                                        MultiModalPromptUpdatesApplyResult,
-                                        PlaceholderFeaturesInfo,
-                                        PromptReplacement, PromptUpdate,
-                                        PromptUpdateDetails,
-                                        replace_token_matches)
-# yapf: enable
+from vllm.multimodal.inputs import (
+    MultiModalDataDict,
+    MultiModalFieldConfig,
+    MultiModalKwargsItems,
+)
+from vllm.multimodal.parse import (
+    ImageProcessorItems,
+    MultiModalDataItems,
+    MultiModalDataParser,
+)
+from vllm.multimodal.processing import (
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+    MultiModalPromptUpdates,
+    MultiModalPromptUpdatesApplyResult,
+    PlaceholderFeaturesInfo,
+    PromptReplacement,
+    PromptUpdate,
+    PromptUpdateDetails,
+    replace_token_matches,
+)
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py
index ec18507465..effdbdc1ac 100644
--- a/vllm/model_executor/models/idefics3.py
+++ b/vllm/model_executor/models/idefics3.py
@@ -43,9 +43,6 @@ from vllm.multimodal.inputs import (
     MultiModalKwargsItems,
 )
 from vllm.multimodal.parse import ImageProcessorItems, ImageSize
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.multimodal.processing import (
     BaseMultiModalProcessor,
     BaseProcessingInfo,
@@ -54,18 +51,13 @@ from vllm.multimodal.processing import (
     PromptUpdate,
     PromptUpdateDetails,
 )
-
-# yapf: enable
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
-# yapf: disable
 from .idefics2_vision_model import (
     Idefics2VisionTransformer as Idefics3VisionTransformer,
 )
-
-# yapf: enable
 from .interfaces import MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal
 from .llama import LlamaModel
 from .utils import AutoWeightsLoader, maybe_prefix
diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py
index eb5a0f555a..d972604db9 100644
--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
@@ -45,9 +45,6 @@ from vllm.multimodal.parse import (
     ImageSize,
     MultiModalDataItems,
 )
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.multimodal.processing import (
     BaseMultiModalProcessor,
     BaseProcessingInfo,
@@ -57,8 +54,6 @@ from vllm.multimodal.processing import (
     PromptUpdate,
     ResolvedPromptUpdate,
 )
-
-# yapf: enable
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils import is_list_of
diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
index 00131b0f5d..ef70e37da3 100644
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -52,16 +52,12 @@ from vllm.distributed import utils as dist_utils
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import get_act_and_mul_fn
 from vllm.model_executor.layers.layernorm import RMSNorm
-
-# yapf: disable
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     MergedColumnParallelLinear,
     QKVParallelLinear,
     RowParallelLinear,
 )
-
-# yapf: enable
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models.module_mapping import MultiModelKeys
diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py
index 34ffa9b698..0d81cbbc03 100644
--- a/vllm/model_executor/models/qwen3_next.py
+++ b/vllm/model_executor/models/qwen3_next.py
@@ -37,12 +37,7 @@ from vllm.model_executor.layers.fla.ops import (
     fused_recurrent_gated_delta_rule,
 )
 from vllm.model_executor.layers.fused_moe import FusedMoE
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.model_executor.layers.layernorm import GemmaRMSNorm as Qwen3NextRMSNorm
-
-# yapf: enable
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     QKVParallelLinear,
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index d967303467..b640bdd1de 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -54,7 +54,6 @@ from .interfaces_base import (
 
 logger = init_logger(__name__)
 
-# yapf: disable
 _TEXT_GENERATION_MODELS = {
     # [Decoder-only]
     "ApertusForCausalLM": ("apertus", "ApertusForCausalLM"),
@@ -106,8 +105,8 @@ _TEXT_GENERATION_MODELS = {
     "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
     "GraniteForCausalLM": ("granite", "GraniteForCausalLM"),
     "GraniteMoeForCausalLM": ("granitemoe", "GraniteMoeForCausalLM"),
-    "GraniteMoeHybridForCausalLM": ("granitemoehybrid", "GraniteMoeHybridForCausalLM"),   # noqa: E501
-    "GraniteMoeSharedForCausalLM": ("granitemoeshared", "GraniteMoeSharedForCausalLM"),   # noqa: E501
+    "GraniteMoeHybridForCausalLM": ("granitemoehybrid", "GraniteMoeHybridForCausalLM"),  # noqa: E501
+    "GraniteMoeSharedForCausalLM": ("granitemoeshared", "GraniteMoeSharedForCausalLM"),  # noqa: E501
     "GritLM": ("gritlm", "GritLM"),
     "Grok1ModelForCausalLM": ("grok1", "Grok1ForCausalLM"),
     "HunYuanMoEV1ForCausalLM": ("hunyuan_v1", "HunYuanMoEV1ForCausalLM"),
@@ -127,7 +126,7 @@ _TEXT_GENERATION_MODELS = {
     "LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"),
     "MambaForCausalLM": ("mamba", "MambaForCausalLM"),
     "FalconMambaForCausalLM": ("mamba", "MambaForCausalLM"),
-    "FalconH1ForCausalLM":("falcon_h1", "FalconH1ForCausalLM"),
+    "FalconH1ForCausalLM": ("falcon_h1", "FalconH1ForCausalLM"),
     "Mamba2ForCausalLM": ("mamba2", "Mamba2ForCausalLM"),
     "MiniCPMForCausalLM": ("minicpm", "MiniCPMForCausalLM"),
     "MiniCPM3ForCausalLM": ("minicpm3", "MiniCPM3ForCausalLM"),
@@ -184,7 +183,8 @@ _EMBEDDING_MODELS = {
     "LlamaModel": ("llama", "LlamaForCausalLM"),
     **{
         # Multiple models share the same architecture, so we include them all
-        k: (mod, arch) for k, (mod, arch) in _TEXT_GENERATION_MODELS.items()
+        k: (mod, arch)
+        for k, (mod, arch) in _TEXT_GENERATION_MODELS.items()
         if arch == "LlamaForCausalLM"
     },
     "MistralModel": ("llama", "LlamaForCausalLM"),
@@ -201,7 +201,10 @@ _EMBEDDING_MODELS = {
     "XLMRobertaModel": ("roberta", "RobertaEmbeddingModel"),
     # [Multimodal]
     "CLIPModel": ("clip", "CLIPEmbeddingModel"),
-    "LlavaNextForConditionalGeneration": ("llava_next", "LlavaNextForConditionalGeneration"),  # noqa: E501
+    "LlavaNextForConditionalGeneration": (
+        "llava_next",
+        "LlavaNextForConditionalGeneration",
+    ),  # noqa: E501
     "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
     "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),  # noqa: E501
     # Technically Terratorch models work on images, both in
@@ -214,79 +217,150 @@ _EMBEDDING_MODELS = {
 _CROSS_ENCODER_MODELS = {
     "BertForSequenceClassification": ("bert", "BertForSequenceClassification"),
     "BertForTokenClassification": ("bert", "BertForTokenClassification"),
-    "GteNewForSequenceClassification": ("bert_with_rope",
-                                        "GteNewForSequenceClassification"),
-    "ModernBertForSequenceClassification": ("modernbert",
-                                            "ModernBertForSequenceClassification"),
-    "RobertaForSequenceClassification": ("roberta",
-                                         "RobertaForSequenceClassification"),
-    "XLMRobertaForSequenceClassification": ("roberta",
-                                            "RobertaForSequenceClassification"),
+    "GteNewForSequenceClassification": (
+        "bert_with_rope",
+        "GteNewForSequenceClassification",
+    ),
+    "ModernBertForSequenceClassification": (
+        "modernbert",
+        "ModernBertForSequenceClassification",
+    ),
+    "RobertaForSequenceClassification": ("roberta", "RobertaForSequenceClassification"),
+    "XLMRobertaForSequenceClassification": (
+        "roberta",
+        "RobertaForSequenceClassification",
+    ),
     # [Auto-converted (see adapters.py)]
-    "JinaVLForRanking": ("jina_vl", "JinaVLForSequenceClassification"), # noqa: E501,
+    "JinaVLForRanking": ("jina_vl", "JinaVLForSequenceClassification"),  # noqa: E501,
 }
 
 _MULTIMODAL_MODELS = {
     # [Decoder-only]
     "AriaForConditionalGeneration": ("aria", "AriaForConditionalGeneration"),
-    "AyaVisionForConditionalGeneration": ("aya_vision", "AyaVisionForConditionalGeneration"),  # noqa: E501
+    "AyaVisionForConditionalGeneration": (
+        "aya_vision",
+        "AyaVisionForConditionalGeneration",
+    ),  # noqa: E501
     "Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"),
-    "ChameleonForConditionalGeneration": ("chameleon", "ChameleonForConditionalGeneration"),  # noqa: E501
-    "Cohere2VisionForConditionalGeneration": ("cohere2_vision", "Cohere2VisionForConditionalGeneration"),  # noqa: E501
+    "ChameleonForConditionalGeneration": (
+        "chameleon",
+        "ChameleonForConditionalGeneration",
+    ),  # noqa: E501
+    "Cohere2VisionForConditionalGeneration": (
+        "cohere2_vision",
+        "Cohere2VisionForConditionalGeneration",
+    ),  # noqa: E501
     "DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"),
     "DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"),
-    "Ernie4_5_VLMoeForConditionalGeneration": ("ernie45_vl", "Ernie4_5_VLMoeForConditionalGeneration"),  # noqa: E501
+    "Ernie4_5_VLMoeForConditionalGeneration": (
+        "ernie45_vl",
+        "Ernie4_5_VLMoeForConditionalGeneration",
+    ),  # noqa: E501
     "FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
     "Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"),  # noqa: E501
-    "Gemma3nForConditionalGeneration": ("gemma3n_mm", "Gemma3nForConditionalGeneration"),    # noqa: E501
+    "Gemma3nForConditionalGeneration": (
+        "gemma3n_mm",
+        "Gemma3nForConditionalGeneration",
+    ),  # noqa: E501
     "GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
     "Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"),  # noqa: E501
     "Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"),  # noqa: E501
-    "GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"),  # noqa: E501
+    "GraniteSpeechForConditionalGeneration": (
+        "granite_speech",
+        "GraniteSpeechForConditionalGeneration",
+    ),  # noqa: E501
     "H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
     "InternVLChatModel": ("internvl", "InternVLChatModel"),
     "NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
-    "InternS1ForConditionalGeneration": ("interns1", "InternS1ForConditionalGeneration"),  # noqa: E501
-    "InternVLForConditionalGeneration": ("interns1", "InternS1ForConditionalGeneration"),  # noqa: E501
-    "Idefics3ForConditionalGeneration":("idefics3","Idefics3ForConditionalGeneration"),
-    "SmolVLMForConditionalGeneration": ("smolvlm","SmolVLMForConditionalGeneration"),  # noqa: E501
+    "InternS1ForConditionalGeneration": (
+        "interns1",
+        "InternS1ForConditionalGeneration",
+    ),  # noqa: E501
+    "InternVLForConditionalGeneration": (
+        "interns1",
+        "InternS1ForConditionalGeneration",
+    ),  # noqa: E501
+    "Idefics3ForConditionalGeneration": (
+        "idefics3",
+        "Idefics3ForConditionalGeneration",
+    ),
+    "SmolVLMForConditionalGeneration": ("smolvlm", "SmolVLMForConditionalGeneration"),  # noqa: E501
     "KeyeForConditionalGeneration": ("keye", "KeyeForConditionalGeneration"),
-    "KeyeVL1_5ForConditionalGeneration": ("keye_vl1_5", "KeyeVL1_5ForConditionalGeneration"), # noqa: E501
+    "KeyeVL1_5ForConditionalGeneration": (
+        "keye_vl1_5",
+        "KeyeVL1_5ForConditionalGeneration",
+    ),  # noqa: E501
     "RForConditionalGeneration": ("rvl", "RForConditionalGeneration"),
     "KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"),  # noqa: E501
     "Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"),
     "Llama4ForConditionalGeneration": ("mllama4", "Llama4ForConditionalGeneration"),  # noqa: E501
     "LlavaForConditionalGeneration": ("llava", "LlavaForConditionalGeneration"),
-    "LlavaNextForConditionalGeneration": ("llava_next", "LlavaNextForConditionalGeneration"),  # noqa: E501
-    "LlavaNextVideoForConditionalGeneration": ("llava_next_video", "LlavaNextVideoForConditionalGeneration"),  # noqa: E501
-    "LlavaOnevisionForConditionalGeneration": ("llava_onevision", "LlavaOnevisionForConditionalGeneration"),  # noqa: E501
+    "LlavaNextForConditionalGeneration": (
+        "llava_next",
+        "LlavaNextForConditionalGeneration",
+    ),  # noqa: E501
+    "LlavaNextVideoForConditionalGeneration": (
+        "llava_next_video",
+        "LlavaNextVideoForConditionalGeneration",
+    ),  # noqa: E501
+    "LlavaOnevisionForConditionalGeneration": (
+        "llava_onevision",
+        "LlavaOnevisionForConditionalGeneration",
+    ),  # noqa: E501
     "MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"),  # noqa: E501
     "MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
-    "MiniMaxVL01ForConditionalGeneration": ("minimax_vl_01", "MiniMaxVL01ForConditionalGeneration"),  # noqa: E501
+    "MiniMaxVL01ForConditionalGeneration": (
+        "minimax_vl_01",
+        "MiniMaxVL01ForConditionalGeneration",
+    ),  # noqa: E501
     "MiniCPMO": ("minicpmo", "MiniCPMO"),
     "MiniCPMV": ("minicpmv", "MiniCPMV"),
-    "Mistral3ForConditionalGeneration": ("mistral3", "Mistral3ForConditionalGeneration"),  # noqa: E501
+    "Mistral3ForConditionalGeneration": (
+        "mistral3",
+        "Mistral3ForConditionalGeneration",
+    ),  # noqa: E501
     "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
     "NVLM_D": ("nvlm_d", "NVLM_D_Model"),
     "Ovis": ("ovis", "Ovis"),
     "Ovis2_5": ("ovis2_5", "Ovis2_5"),
-    "PaliGemmaForConditionalGeneration": ("paligemma", "PaliGemmaForConditionalGeneration"),  # noqa: E501
+    "PaliGemmaForConditionalGeneration": (
+        "paligemma",
+        "PaliGemmaForConditionalGeneration",
+    ),  # noqa: E501
     "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
     "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
     "Phi4MultimodalForCausalLM": ("phi4_multimodal", "Phi4MultimodalForCausalLM"),  # noqa: E501
     "PixtralForConditionalGeneration": ("pixtral", "PixtralForConditionalGeneration"),  # noqa: E501
     "QwenVLForConditionalGeneration": ("qwen_vl", "QwenVLForConditionalGeneration"),  # noqa: E501
     "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),  # noqa: E501
-    "Qwen2_5_VLForConditionalGeneration": ("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"),  # noqa: E501
-    "Qwen2AudioForConditionalGeneration": ("qwen2_audio", "Qwen2AudioForConditionalGeneration"),  # noqa: E501
-    "Qwen2_5OmniModel": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"),  # noqa: E501
-    "Qwen2_5OmniForConditionalGeneration": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"),  # noqa: E501
+    "Qwen2_5_VLForConditionalGeneration": (
+        "qwen2_5_vl",
+        "Qwen2_5_VLForConditionalGeneration",
+    ),  # noqa: E501
+    "Qwen2AudioForConditionalGeneration": (
+        "qwen2_audio",
+        "Qwen2AudioForConditionalGeneration",
+    ),  # noqa: E501
+    "Qwen2_5OmniModel": (
+        "qwen2_5_omni_thinker",
+        "Qwen2_5OmniThinkerForConditionalGeneration",
+    ),  # noqa: E501
+    "Qwen2_5OmniForConditionalGeneration": (
+        "qwen2_5_omni_thinker",
+        "Qwen2_5OmniThinkerForConditionalGeneration",
+    ),  # noqa: E501
     "Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"),  # noqa: E501
-    "Qwen3VLMoeForConditionalGeneration": ("qwen3_vl_moe", "Qwen3VLMoeForConditionalGeneration"),  # noqa: E501
+    "Qwen3VLMoeForConditionalGeneration": (
+        "qwen3_vl_moe",
+        "Qwen3VLMoeForConditionalGeneration",
+    ),  # noqa: E501
     "SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"),
     "Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"),  # noqa: E501
     "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"),  # noqa: E501
-    "Tarsier2ForConditionalGeneration": ("qwen2_vl", "Tarsier2ForConditionalGeneration"),  # noqa: E501
+    "Tarsier2ForConditionalGeneration": (
+        "qwen2_vl",
+        "Tarsier2ForConditionalGeneration",
+    ),  # noqa: E501
     "UltravoxModel": ("ultravox", "UltravoxModel"),
     "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"),  # noqa: E501
     # [Encoder-decoder]
@@ -324,13 +398,27 @@ _TRANSFORMERS_BACKEND_MODELS = {
     "TransformersForCausalLM": ("transformers", "TransformersForCausalLM"),
     "TransformersForMultimodalLM": ("transformers", "TransformersForMultimodalLM"),  # noqa: E501
     "TransformersMoEForCausalLM": ("transformers_moe", "TransformersMoEForCausalLM"),  # noqa: E501
-    "TransformersMoEForMultimodalLM": ("transformers_moe", "TransformersMoEForMultimodalLM"),  # noqa: E501
-    "TransformersEmbeddingModel": ("transformers_pooling", "TransformersEmbeddingModel"),  # noqa: E501
-    "TransformersForSequenceClassification": ("transformers_pooling", "TransformersForSequenceClassification"),  # noqa: E501
-    "TransformersMoEForSequenceClassification": ("transformers_pooling", "TransformersMoEForSequenceClassification"),  # noqa: E501
-    "TransformersMoEEmbeddingModel": ("transformers_pooling", "TransformersMoEEmbeddingModel"),  # noqa: E501
+    "TransformersMoEForMultimodalLM": (
+        "transformers_moe",
+        "TransformersMoEForMultimodalLM",
+    ),  # noqa: E501
+    "TransformersEmbeddingModel": (
+        "transformers_pooling",
+        "TransformersEmbeddingModel",
+    ),  # noqa: E501
+    "TransformersForSequenceClassification": (
+        "transformers_pooling",
+        "TransformersForSequenceClassification",
+    ),  # noqa: E501
+    "TransformersMoEForSequenceClassification": (
+        "transformers_pooling",
+        "TransformersMoEForSequenceClassification",
+    ),  # noqa: E501
+    "TransformersMoEEmbeddingModel": (
+        "transformers_pooling",
+        "TransformersMoEEmbeddingModel",
+    ),  # noqa: E501
 }
-# yapf: enable
 
 _VLLM_MODELS = {
     **_TEXT_GENERATION_MODELS,
diff --git a/vllm/model_executor/models/smolvlm.py b/vllm/model_executor/models/smolvlm.py
index d0018f95df..1800330c82 100644
--- a/vllm/model_executor/models/smolvlm.py
+++ b/vllm/model_executor/models/smolvlm.py
@@ -8,13 +8,10 @@ from transformers import SmolVLMProcessor
 from vllm.config import VllmConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY
 
-# yapf: disable
 from .idefics3 import Idefics3DummyInputsBuilder as SmolVLMDummyInputsBuilder
 from .idefics3 import Idefics3ForConditionalGeneration, Idefics3ProcessingInfo
 from .idefics3 import Idefics3MultiModalProcessor as SmolVLMMultiModalProcessor
 
-# yapf: enable
-
 
 class SmolVLMProcessingInfo(Idefics3ProcessingInfo):
     def get_hf_processor(self, **kwargs: object) -> SmolVLMProcessor:
diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py
index 6d15a3a025..8525bdd5bf 100644
--- a/vllm/model_executor/models/voxtral.py
+++ b/vllm/model_executor/models/voxtral.py
@@ -32,11 +32,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models import SupportsPP
 from vllm.model_executor.models.module_mapping import MultiModelKeys
-
-# yapf: disable
 from vllm.model_executor.models.whisper import WhisperEncoder
-
-# yapf: enable
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import (
     MultiModalDataDict,
diff --git a/vllm/transformers_utils/chat_templates/registry.py b/vllm/transformers_utils/chat_templates/registry.py
index dadbb727e1..b8d0cd8d2f 100644
--- a/vllm/transformers_utils/chat_templates/registry.py
+++ b/vllm/transformers_utils/chat_templates/registry.py
@@ -28,7 +28,6 @@ def _get_minicpmv_chat_template_fallback(tokenizer_name_or_path: str) -> Optiona
     return CHAT_TEMPLATES_DIR / "template_chatml.jinja"
 
 
-# yapf: disable
 _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, ChatTemplatePath] = {
     "blip-2": CHAT_TEMPLATES_DIR / "template_blip2.jinja",
     "clip": CHAT_TEMPLATES_DIR / "template_basic.jinja",
@@ -39,7 +38,6 @@ _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, ChatTemplatePath] = {
     "paligemma": CHAT_TEMPLATES_DIR / "template_basic.jinja",
     "qwen": _get_qwen_chat_template_fallback,
 }
-# yapf: enable
 
 
 def register_chat_template_fallback_path(
diff --git a/vllm/transformers_utils/configs/arctic.py b/vllm/transformers_utils/configs/arctic.py
index a789b93b5e..1707e15285 100644
--- a/vllm/transformers_utils/configs/arctic.py
+++ b/vllm/transformers_utils/configs/arctic.py
@@ -1,12 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# yapf: disable
 # ruff: noqa: E501
 # coding=utf-8
 # Copied from
 # https://huggingface.co/Snowflake/snowflake-arctic-instruct/blob/main/configuration_arctic.py
-""" Arctic model configuration"""
+"""Arctic model configuration"""
 
 from dataclasses import asdict, dataclass
 from typing import Any
diff --git a/vllm/transformers_utils/configs/nemotron_vl.py b/vllm/transformers_utils/configs/nemotron_vl.py
index 6a642f26b8..6f98fbafbe 100644
--- a/vllm/transformers_utils/configs/nemotron_vl.py
+++ b/vllm/transformers_utils/configs/nemotron_vl.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# yapf: disable
 # ruff: noqa: E501
 # Adapted from
 # https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1/blob/main/configuration.py
@@ -16,7 +15,7 @@ from transformers.dynamic_module_utils import get_class_from_dynamic_module
 
 
 class Nemotron_Nano_VL_Config(PretrainedConfig):
-    model_type = 'Llama_Nemotron_Nano_VL'
+    model_type = "Llama_Nemotron_Nano_VL"
     is_composition = True
 
     def __init__(
@@ -26,17 +25,22 @@ class Nemotron_Nano_VL_Config(PretrainedConfig):
         force_image_size=None,
         downsample_ratio=0.5,
         template=None,
-        ps_version='v1',
+        ps_version="v1",
         image_tag_type="internvl",
         projector_hidden_size=4096,
         vit_hidden_size=1280,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(**kwargs)
 
         if vision_config is not None:
-            assert "auto_map" in vision_config and "AutoConfig" in vision_config["auto_map"]
-            vision_auto_config = get_class_from_dynamic_module(*vision_config["auto_map"]["AutoConfig"].split("--")[::-1])
+            assert (
+                "auto_map" in vision_config
+                and "AutoConfig" in vision_config["auto_map"]
+            )
+            vision_auto_config = get_class_from_dynamic_module(
+                *vision_config["auto_map"]["AutoConfig"].split("--")[::-1]
+            )
             self.vision_config = vision_auto_config(**vision_config)
         else:
             self.vision_config = PretrainedConfig()
@@ -51,6 +55,6 @@ class Nemotron_Nano_VL_Config(PretrainedConfig):
         self.downsample_ratio = downsample_ratio
         self.template = template  # TODO move out of here and into the tokenizer
         self.ps_version = ps_version  # Pixel shuffle version
-        self.image_tag_type = image_tag_type # TODO: into the tokenizer too?
+        self.image_tag_type = image_tag_type  # TODO: into the tokenizer too?
         self.projector_hidden_size = projector_hidden_size
         self.vit_hidden_size = vit_hidden_size
diff --git a/vllm/transformers_utils/configs/ovis.py b/vllm/transformers_utils/configs/ovis.py
index 550f5e15db..404fa700a2 100644
--- a/vllm/transformers_utils/configs/ovis.py
+++ b/vllm/transformers_utils/configs/ovis.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# yapf: disable
 # ruff: noqa: E501
 # adapted from https://huggingface.co/AIDC-AI/Ovis2-1B/blob/main/configuration_aimv2.py
 # and https://huggingface.co/AIDC-AI/Ovis2-1B/blob/main/configuration_ovis.py
@@ -70,34 +69,37 @@ class AIMv2Config(PretrainedConfig):
 #                     Visual Tokenizer Configuration
 # ----------------------------------------------------------------------
 class BaseVisualTokenizerConfig(PretrainedConfig):
-
-    def __init__(self,
-                 vocab_size=16384,
-                 tokenize_function="softmax",
-                 tau=1.0,
-                 depths=None,
-                 drop_cls_token=False,
-                 backbone_config: Optional[Union[PretrainedConfig,
-                                                 dict]] = None,
-                 hidden_stride: int = 1,
-                 **kwargs):
+    def __init__(
+        self,
+        vocab_size=16384,
+        tokenize_function="softmax",
+        tau=1.0,
+        depths=None,
+        drop_cls_token=False,
+        backbone_config: Optional[Union[PretrainedConfig, dict]] = None,
+        hidden_stride: int = 1,
+        **kwargs,
+    ):
         super().__init__(**kwargs)
         self.vocab_size = vocab_size
         self.tokenize_function = tokenize_function
         self.tau = tau
         if isinstance(depths, str):
-            depths = [int(x) for x in depths.split('|')]
+            depths = [int(x) for x in depths.split("|")]
         self.depths = depths
         self.backbone_kwargs = dict[str, Any]()
         self.drop_cls_token = drop_cls_token
         if backbone_config is not None:
-            assert isinstance(backbone_config, (PretrainedConfig, dict)), \
+            assert isinstance(backbone_config, (PretrainedConfig, dict)), (
                 f"expect `backbone_config` to be instance of PretrainedConfig or dict, but got {type(backbone_config)} type"
+            )
             if not isinstance(backbone_config, PretrainedConfig):
-                model_type = backbone_config['model_type']
+                model_type = backbone_config["model_type"]
                 if model_type != "aimv2":
-                    backbone_config.pop('model_type')
-                    backbone_config = AutoConfig.for_model(model_type, **backbone_config)
+                    backbone_config.pop("model_type")
+                    backbone_config = AutoConfig.for_model(
+                        model_type, **backbone_config
+                    )
                 else:
                     backbone_config = AIMv2Config(**backbone_config)
         self.backbone_config = backbone_config
@@ -113,7 +115,7 @@ class Aimv2VisualTokenizerConfig(BaseVisualTokenizerConfig):
             self.drop_cls_token = False
         if self.depths:
             assert len(self.depths) == 1
-            self.backbone_kwargs['num_hidden_layers'] = self.depths[0]
+            self.backbone_kwargs["num_hidden_layers"] = self.depths[0]
 
 
 class SiglipVisualTokenizerConfig(BaseVisualTokenizerConfig):
@@ -125,7 +127,7 @@ class SiglipVisualTokenizerConfig(BaseVisualTokenizerConfig):
             self.drop_cls_token = False
         if self.depths:
             assert len(self.depths) == 1
-            self.backbone_kwargs['num_hidden_layers'] = self.depths[0]
+            self.backbone_kwargs["num_hidden_layers"] = self.depths[0]
 
 
 AutoConfig.register("siglip_visual_tokenizer", SiglipVisualTokenizerConfig)
@@ -138,35 +140,39 @@ AutoConfig.register("aimv2_visual_tokenizer", Aimv2VisualTokenizerConfig)
 class OvisConfig(PretrainedConfig):
     model_type = "ovis"
 
-    def __init__(self,
-                 llm_config: Optional[Union[PretrainedConfig, dict]] = None,
-                 visual_tokenizer_config: Optional[Union[PretrainedConfig,
-                                                         dict]] = None,
-                 multimodal_max_length=8192,
-                 hidden_size=None,
-                 conversation_formatter_class=None,
-                 llm_attn_implementation=None,
-                 disable_tie_weight=False,
-                 **kwargs):
+    def __init__(
+        self,
+        llm_config: Optional[Union[PretrainedConfig, dict]] = None,
+        visual_tokenizer_config: Optional[Union[PretrainedConfig, dict]] = None,
+        multimodal_max_length=8192,
+        hidden_size=None,
+        conversation_formatter_class=None,
+        llm_attn_implementation=None,
+        disable_tie_weight=False,
+        **kwargs,
+    ):
         super().__init__(**kwargs)
         if llm_config is not None:
-            assert isinstance(llm_config, (PretrainedConfig, dict)), \
+            assert isinstance(llm_config, (PretrainedConfig, dict)), (
                 f"expect `llm_config` to be instance of PretrainedConfig or dict, but got {type(llm_config)} type"
+            )
             if not isinstance(llm_config, PretrainedConfig):
-                model_type = llm_config['model_type']
-                llm_config.pop('model_type')
+                model_type = llm_config["model_type"]
+                llm_config.pop("model_type")
                 llm_config = AutoConfig.for_model(model_type, **llm_config)
 
         # map llm_config to text_config
         self.text_config = llm_config
         if visual_tokenizer_config is not None:
-            assert isinstance(visual_tokenizer_config, (PretrainedConfig, dict)), \
+            assert isinstance(visual_tokenizer_config, (PretrainedConfig, dict)), (
                 f"expect `visual_tokenizer_config` to be instance of PretrainedConfig or dict, but got {type(visual_tokenizer_config)} type"
+            )
             if not isinstance(visual_tokenizer_config, PretrainedConfig):
-                model_type = visual_tokenizer_config['model_type']
-                visual_tokenizer_config.pop('model_type')
+                model_type = visual_tokenizer_config["model_type"]
+                visual_tokenizer_config.pop("model_type")
                 visual_tokenizer_config = AutoConfig.for_model(
-                    model_type, **visual_tokenizer_config)
+                    model_type, **visual_tokenizer_config
+                )
 
         self.visual_tokenizer_config = visual_tokenizer_config
         self.multimodal_max_length = multimodal_max_length
diff --git a/vllm/transformers_utils/processors/deepseek_vl2.py b/vllm/transformers_utils/processors/deepseek_vl2.py
index d1d117b4e2..5ef258b9be 100644
--- a/vllm/transformers_utils/processors/deepseek_vl2.py
+++ b/vllm/transformers_utils/processors/deepseek_vl2.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# yapf: disable
 # ruff: noqa: E501
 # coding=utf-8
 # adapted from https://github.com/deepseek-ai/DeepSeek-VL2/blob/ff23960c5cf9e6874b44be38af930cfb0ccbb620/deepseek_vl2/models/processing_deepseek_vl_v2.py
@@ -35,11 +34,12 @@ from transformers.processing_utils import ProcessorMixin
 
 
 class ImageTransform:
-
-    def __init__(self,
-                 mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
-                 std: tuple[float, float, float] = (0.5, 0.5, 0.5),
-                 normalize: bool = True):
+    def __init__(
+        self,
+        mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
+        std: tuple[float, float, float] = (0.5, 0.5, 0.5),
+        normalize: bool = True,
+    ):
         self.mean = mean
         self.std = std
         self.normalize = normalize
@@ -77,7 +77,6 @@ class DeepseekVLV2Processor(ProcessorMixin):
         ignore_id: int = -100,
         **kwargs,
     ):
-
         self.candidate_resolutions = candidate_resolutions
         self.image_size = candidate_resolutions[0][0]
         self.patch_size = patch_size
@@ -86,13 +85,15 @@ class DeepseekVLV2Processor(ProcessorMixin):
         self.normalize = normalize
         self.downsample_ratio = downsample_ratio
 
-        self.image_transform = ImageTransform(mean=image_mean, std=image_std, normalize=normalize)
+        self.image_transform = ImageTransform(
+            mean=image_mean, std=image_std, normalize=normalize
+        )
         self.tokenizer = tokenizer
-        self.tokenizer.padding_side = 'left'  # must set this，padding side with make a difference in batch inference
+        self.tokenizer.padding_side = "left"  # must set this，padding side with make a difference in batch inference
 
         # add the pad_token as special token to use 'tokenizer.pad_token' and 'tokenizer.pad_token_id'
         if tokenizer.pad_token is None:
-            self.tokenizer.add_special_tokens({'pad_token': pad_token})
+            self.tokenizer.add_special_tokens({"pad_token": pad_token})
 
         # add image token
         image_token_id = self.tokenizer.vocab.get(image_token)
@@ -104,7 +105,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
 
         # add five special tokens for grounding-related tasks
         # <|ref|>, <|/ref|>, <|det|>, <|/det|>, <|grounding|>
-        special_tokens = ['<|ref|>', '<|/ref|>', '<|det|>', '<|/det|>', '<|grounding|>']
+        special_tokens = ["<|ref|>", "<|/ref|>", "<|det|>", "<|/det|>", "<|grounding|>"]
         special_tokens_dict = {"additional_special_tokens": special_tokens}
         self.tokenizer.add_special_tokens(special_tokens_dict)
 
@@ -134,15 +135,19 @@ class DeepseekVLV2Processor(ProcessorMixin):
 
         for width, height in self.candidate_resolutions:
             scale = min(width / original_width, height / original_height)
-            downscaled_width, downscaled_height = int(
-                original_width * scale), int(original_height * scale)
-            effective_resolution = min(downscaled_width * downscaled_height,
-                                       original_width * original_height)
+            downscaled_width, downscaled_height = (
+                int(original_width * scale),
+                int(original_height * scale),
+            )
+            effective_resolution = min(
+                downscaled_width * downscaled_height, original_width * original_height
+            )
             wasted_resolution = (width * height) - effective_resolution
 
             if effective_resolution > max_effective_resolution or (
-                    effective_resolution == max_effective_resolution
-                    and wasted_resolution < min_wasted_resolution):
+                effective_resolution == max_effective_resolution
+                and wasted_resolution < min_wasted_resolution
+            ):
                 max_effective_resolution = effective_resolution
                 min_wasted_resolution = wasted_resolution
                 best_fit = (width, height)
@@ -198,12 +203,20 @@ class DeepseekVLV2Processor(ProcessorMixin):
                 - num_image_tokens (list[int]): the number of image tokens
         """
 
-        assert (prompt is not None and images is not None
-                ), "prompt and images must be used at the same time."
+        assert prompt is not None and images is not None, (
+            "prompt and images must be used at the same time."
+        )
 
         sft_format = prompt
-        tokenized_str, images_list, images_seq_mask, images_spatial_crop, num_image_tokens = self.tokenize_with_images(
-            sft_format, images, bos=True, eos=True, cropping=len(images) <= 2)
+        (
+            tokenized_str,
+            images_list,
+            images_seq_mask,
+            images_spatial_crop,
+            num_image_tokens,
+        ) = self.tokenize_with_images(
+            sft_format, images, bos=True, eos=True, cropping=len(images) <= 2
+        )
         masked_tokenized_str = []
         for token_index in tokenized_str:
             if token_index != self.image_token_id:
@@ -211,17 +224,21 @@ class DeepseekVLV2Processor(ProcessorMixin):
             else:
                 masked_tokenized_str.append(self.ignore_id)
 
-        assert len(tokenized_str) == len(images_seq_mask) == len(masked_tokenized_str), \
-            (f"tokenized_str's length {len(tokenized_str)}, input_ids' length {len(masked_tokenized_str)}, "
-             f"imags_seq_mask's length {len(images_seq_mask)}, are not equal")
+        assert (
+            len(tokenized_str) == len(images_seq_mask) == len(masked_tokenized_str)
+        ), (
+            f"tokenized_str's length {len(tokenized_str)}, input_ids' length {len(masked_tokenized_str)}, "
+            f"imags_seq_mask's length {len(images_seq_mask)}, are not equal"
+        )
 
         input_ids = torch.LongTensor(tokenized_str)
         target_ids = torch.LongTensor(masked_tokenized_str)
         images_seq_mask = torch.tensor(images_seq_mask, dtype=torch.bool)
 
         # set input_ids < 0 | input_ids == self.image_token_id as ignore_id
-        target_ids[(input_ids < 0) |
-                   (input_ids == self.image_token_id)] = self.ignore_id
+        target_ids[(input_ids < 0) | (input_ids == self.image_token_id)] = (
+            self.ignore_id
+        )
         input_ids[input_ids < 0] = self.pad_id
 
         if inference_mode:
@@ -311,30 +328,50 @@ class DeepseekVLV2Processor(ProcessorMixin):
                 best_width, best_height = self.image_size, self.image_size
 
             """process the global view"""
-            global_view = ImageOps.pad(image, (self.image_size, self.image_size),
-                                       color=tuple(int(x * 255) for x in self.image_transform.mean))
+            global_view = ImageOps.pad(
+                image,
+                (self.image_size, self.image_size),
+                color=tuple(int(x * 255) for x in self.image_transform.mean),
+            )
             images_list.append(self.image_transform(global_view))
 
             """process the local views"""
-            local_view = ImageOps.pad(image, (best_width, best_height),
-                                      color=tuple(int(x * 255) for x in self.image_transform.mean))
+            local_view = ImageOps.pad(
+                image,
+                (best_width, best_height),
+                color=tuple(int(x * 255) for x in self.image_transform.mean),
+            )
             for i in range(0, best_height, self.image_size):
                 for j in range(0, best_width, self.image_size):
                     images_list.append(
-                        self.image_transform(local_view.crop((j, i, j + self.image_size, i + self.image_size))))
+                        self.image_transform(
+                            local_view.crop(
+                                (j, i, j + self.image_size, i + self.image_size)
+                            )
+                        )
+                    )
 
             """record height / width crop num"""
-            num_width_tiles, num_height_tiles = best_width // self.image_size, best_height // self.image_size
+            num_width_tiles, num_height_tiles = (
+                best_width // self.image_size,
+                best_height // self.image_size,
+            )
             images_spatial_crop.append([num_width_tiles, num_height_tiles])
 
             """add image tokens"""
-            h = w = math.ceil((self.image_size // self.patch_size) / self.downsample_ratio)
+            h = w = math.ceil(
+                (self.image_size // self.patch_size) / self.downsample_ratio
+            )
             # global views tokens h * (w + 1), 1 is for line separator
             tokenized_image = [self.image_token_id] * h * (w + 1)
             # add a separator between global and local views
             tokenized_image += [self.image_token_id]
             # local views tokens, (num_height_tiles * h) * (num_width_tiles * w + 1)
-            tokenized_image += [self.image_token_id] * (num_height_tiles * h) * (num_width_tiles * w + 1)
+            tokenized_image += (
+                [self.image_token_id]
+                * (num_height_tiles * h)
+                * (num_width_tiles * w + 1)
+            )
 
             tokenized_str += tokenized_image
             images_seq_mask += [True] * len(tokenized_image)
@@ -353,10 +390,17 @@ class DeepseekVLV2Processor(ProcessorMixin):
             tokenized_str = tokenized_str + [self.eos_id]
             images_seq_mask = images_seq_mask + [False]
 
-        assert len(tokenized_str) == len(
-            images_seq_mask), f"tokenize_with_images func: tokenized_str's length {len(tokenized_str)} is not equal to imags_seq_mask's length {len(images_seq_mask)}"
+        assert len(tokenized_str) == len(images_seq_mask), (
+            f"tokenize_with_images func: tokenized_str's length {len(tokenized_str)} is not equal to imags_seq_mask's length {len(images_seq_mask)}"
+        )
 
-        return tokenized_str, images_list, images_seq_mask, images_spatial_crop, num_image_tokens
+        return (
+            tokenized_str,
+            images_list,
+            images_seq_mask,
+            images_spatial_crop,
+            num_image_tokens,
+        )
 
 
 AutoProcessor.register("DeepseekVLV2Processor", DeepseekVLV2Processor)
diff --git a/vllm/transformers_utils/processors/ovis.py b/vllm/transformers_utils/processors/ovis.py
index d7546695fd..6d52ab48c9 100644
--- a/vllm/transformers_utils/processors/ovis.py
+++ b/vllm/transformers_utils/processors/ovis.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# yapf: disable
 # ruff: noqa: E501
 # coding=utf-8
 # adapted from https://github.com/AIDC-AI/Ovis/blob/35ab51a1a1e3542fa6db260a1084cefbc8f164bb/ovis/vllm/processing_ovis.py
@@ -35,23 +34,24 @@ from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
 
 from vllm.multimodal.image import convert_image_mode
 
-__all__ = ['OvisProcessor']
+__all__ = ["OvisProcessor"]
 IGNORE_ID = -100
 
-class OvisProcessorKwargs(ProcessingKwargs, total=False):   # type: ignore[call-arg]
+
+class OvisProcessorKwargs(ProcessingKwargs, total=False):  # type: ignore[call-arg]
     _defaults = {
         "text_kwargs": {
             "padding": False,
         },
         "images_kwargs": {
-            'max_partition':9,
-            'covering_threshold':0.9,
-            'convert_to_rgb':True,
-        'return_tensors':'pt'},
+            "max_partition": 9,
+            "covering_threshold": 0.9,
+            "convert_to_rgb": True,
+            "return_tensors": "pt",
+        },
     }
 
 
-
 class OvisProcessor(ProcessorMixin):
     r"""
     Constructs an Ovis processor which wraps an Ovis image processor and a Qwen2 tokenizer into a single processor.
@@ -97,14 +97,16 @@ class OvisProcessor(ProcessorMixin):
             "image_col_sep": -303,
             "image_row_sep": -304,
             "image_end": -305,
-            'image_pad': image_pad_token_id,
+            "image_pad": image_pad_token_id,
         }
         return extra_special_tokens
 
     def __call__(
         self,
         images: ImageInput = None,
-        text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
+        text: Union[
+            TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]
+        ] = None,
         **kwargs: Unpack[OvisProcessorKwargs],
     ) -> BatchFeature:
         """
@@ -169,7 +171,6 @@ class OvisProcessor(ProcessorMixin):
 
         # Process text input
         if text is not None:
-
             if not isinstance(text, list):
                 text = [text]
 
@@ -178,7 +179,10 @@ class OvisProcessor(ProcessorMixin):
             replaced_ids_list = []
             idx = 0
             for ids_tensor in tokenized_batched_text:
-                if image_token_id in ids_tensor and "image_placeholders" in image_features:
+                if (
+                    image_token_id in ids_tensor
+                    and "image_placeholders" in image_features
+                ):
                     if idx < len(image_features["image_placeholders"]):
                         # Converts in list for ease of use
                         ids_list = ids_tensor.tolist()
@@ -188,7 +192,9 @@ class OvisProcessor(ProcessorMixin):
                         # replace placeholders
                         for i, token_id in enumerate(ids_list):
                             if token_id == image_token_id:
-                                placeholder_ids = image_features["image_placeholders"][idx]
+                                placeholder_ids = image_features["image_placeholders"][
+                                    idx
+                                ]
                                 new_ids.extend(placeholder_ids)
                                 idx += 1
                             else:
@@ -198,7 +204,8 @@ class OvisProcessor(ProcessorMixin):
                         ids_tensor = torch.tensor(new_ids, dtype=torch.long)
                     else:
                         raise RuntimeError(
-                            'Mismatch between the images you provided and the number of placeholder present in the text')
+                            "Mismatch between the images you provided and the number of placeholder present in the text"
+                        )
 
                 replaced_ids_list.append(ids_tensor)
 
@@ -217,7 +224,7 @@ class OvisProcessor(ProcessorMixin):
             # Add image features if present
             if image_features:
                 output["pixel_values"] = processed_images
-                output['grids'] = grids
+                output["grids"] = grids
 
             return output
 
@@ -227,8 +234,10 @@ class OvisProcessor(ProcessorMixin):
     def _tokenize_with_image_symbol(self, text_list: list[str]) -> torch.LongTensor:
         batch_token_ids = []
         for text in text_list:
-            text_chunks = [self.tokenizer(chunk, add_special_tokens=False).input_ids for chunk in
-                           text.split(self.image_token)]
+            text_chunks = [
+                self.tokenizer(chunk, add_special_tokens=False).input_ids
+                for chunk in text.split(self.image_token)
+            ]
             token_ids = []
             num_chuck = len(text_chunks)
             for i, chunk in enumerate(text_chunks):
@@ -240,50 +249,60 @@ class OvisProcessor(ProcessorMixin):
 
     def get_image_size(self):
         size = self.image_processor.size
-        if 'shortest_edge' in size:
-            width = height = size['shortest_edge']
+        if "shortest_edge" in size:
+            width = height = size["shortest_edge"]
         elif "height" in size and "width" in size:
-            width = size['width']
-            height = size['height']
+            width = size["width"]
+            height = size["height"]
         else:
-            raise ValueError( "Can't parse image size from image_processor config.")
+            raise ValueError("Can't parse image size from image_processor config.")
         return height, width
 
     def get_token_value(self, tok):
         return self.extra_special_tokens[tok]
 
     def construct_image_indicators(self, grid):
-        image_placeholders = [self.get_token_value('image_start'),
-                              self.get_token_value('image_atom'),
-                              self.get_token_value('image_prefix')]
+        image_placeholders = [
+            self.get_token_value("image_start"),
+            self.get_token_value("image_atom"),
+            self.get_token_value("image_prefix"),
+        ]
         if grid[0] * grid[1] > 1:
             for r in range(grid[0]):
                 for c in range(grid[1]):
-                    image_placeholders.append(self.get_token_value('image_atom') )
+                    image_placeholders.append(self.get_token_value("image_atom"))
                     if c < grid[1] - 1:
-                        image_placeholders.append(self.get_token_value('image_col_sep'))
+                        image_placeholders.append(self.get_token_value("image_col_sep"))
                 if r < grid[0] - 1:
-                    image_placeholders.append(self.get_token_value('image_row_sep'))
-        image_placeholders.append(self.get_token_value('image_end'))
+                    image_placeholders.append(self.get_token_value("image_row_sep"))
+        image_placeholders.append(self.get_token_value("image_end"))
         return image_placeholders
 
     def construct_image_placeholders(self, grid):
-
         image_placeholders = self.construct_image_indicators(grid)
 
-        image_atom_token_id = self.get_token_value('image_atom')
+        image_atom_token_id = self.get_token_value("image_atom")
         # Extract the padding token ID from tokenizer
-        image_padding_token_id = self.get_token_value('image_pad')
+        image_padding_token_id = self.get_token_value("image_pad")
 
         # Create a new list with padding tokens inserted
         padded_placeholder_tokens = []
         for token in image_placeholders:
             padded_placeholder_tokens.append(image_padding_token_id)
             if token == image_atom_token_id:
-                padded_placeholder_tokens.extend([image_padding_token_id] * self.image_segment_len)
+                padded_placeholder_tokens.extend(
+                    [image_padding_token_id] * self.image_segment_len
+                )
         return padded_placeholder_tokens
 
-    def preprocess_image(self, image: PIL.Image.Image, max_partition, covering_threshold, convert_to_rgb, return_tensors):
+    def preprocess_image(
+        self,
+        image: PIL.Image.Image,
+        max_partition,
+        covering_threshold,
+        convert_to_rgb,
+        return_tensors,
+    ):
         def _preprocess(img: PIL.Image.Image, side):
             # first resize and preprocess
             w, h = img.size
@@ -296,19 +315,27 @@ class OvisProcessor(ProcessorMixin):
                 new_height = side
                 new_width = int(w / h * new_height)
             new_size = dict(height=new_height, width=new_width)
-            pixel_values = self.image_processor.preprocess(img, size=new_size, return_tensors=return_tensors)['pixel_values']
+            pixel_values = self.image_processor.preprocess(
+                img, size=new_size, return_tensors=return_tensors
+            )["pixel_values"]
 
             # then pad to square
-            square_values = torch.zeros([1, 3, side, side], dtype=pixel_values.dtype, device=pixel_values.device)
+            square_values = torch.zeros(
+                [1, 3, side, side], dtype=pixel_values.dtype, device=pixel_values.device
+            )
             new_height, new_width = pixel_values.shape[2:]
             if new_height == new_width:
                 square_values[:, :, :, :] = pixel_values
             elif new_height > new_width:
                 from_index = (side - new_width) // 2
-                square_values[:, :, :, from_index:from_index + new_width] = pixel_values
+                square_values[:, :, :, from_index : from_index + new_width] = (
+                    pixel_values
+                )
             else:
                 from_index = (side - new_height) // 2
-                square_values[:, :, from_index:from_index + new_height, :] = pixel_values
+                square_values[:, :, from_index : from_index + new_height, :] = (
+                    pixel_values
+                )
 
             return square_values
 
@@ -350,7 +377,9 @@ class OvisProcessor(ProcessorMixin):
             good_grids = []
             for grid in candidate_grids:
                 partition = _partition(img, grid)
-                covering_ratio = sum([_covering_area(*p, side) for p in partition]) / img_area
+                covering_ratio = (
+                    sum([_covering_area(*p, side) for p in partition]) / img_area
+                )
                 assert covering_ratio <= 1.0
                 all_grids.append((grid, covering_ratio))
                 if covering_ratio > covering_threshold:
@@ -358,18 +387,19 @@ class OvisProcessor(ProcessorMixin):
 
             if len(good_grids) > 0:
                 # pick the good partition with minimum #sub_images and break the tie using covering_ratio
-                return sorted(good_grids, key=lambda x: (x[0][0] * x[0][1], -x[1]))[0][0]
+                return sorted(good_grids, key=lambda x: (x[0][0] * x[0][1], -x[1]))[0][
+                    0
+                ]
             else:
                 # pick the partition with maximum covering_ratio and break the tie using #sub_images
                 return sorted(all_grids, key=lambda x: (-x[1], x[0][0] * x[0][1]))[0][0]
 
         if convert_to_rgb:
-            image = convert_image_mode(image, 'RGB')
-
+            image = convert_image_mode(image, "RGB")
 
         sides = self.get_image_size()
         if sides[0] != sides[1]:
-            raise ValueError('get_image_size() returns non-square size')
+            raise ValueError("get_image_size() returns non-square size")
         side = sides[0]
         grid = _get_best_grid(image, side)
         partition = _partition(image, grid)
@@ -405,14 +435,18 @@ class OvisProcessor(ProcessorMixin):
             `list[str]`: The decoded text.
         """
         return self.tokenizer.batch_decode(
-            generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            generated_outputs,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False,
         )
 
     @property
     def model_input_names(self):
         tokenizer_input_names = self.tokenizer.model_input_names
         image_processor_input_names = self.image_processor.model_input_names
-        names_from_processor = list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+        names_from_processor = list(
+            dict.fromkeys(tokenizer_input_names + image_processor_input_names)
+        )
         return names_from_processor + ["second_per_grid_ts"]
 
 
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 1c05a17db8..07316cd12a 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -40,9 +40,6 @@ from vllm.utils.flashinfer import (
     supports_trtllm_attention,
     use_trtllm_attention,
 )
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.v1.attention.backends.utils import (
     AttentionCGSupport,
     AttentionMetadataBuilder,
@@ -52,8 +49,6 @@ from vllm.v1.attention.backends.utils import (
     infer_global_hyperparameters,
     split_decodes_and_prefills,
 )
-
-# yapf: enable
 from vllm.v1.kv_cache_interface import AttentionSpec
 
 FLASHINFER_WORKSPACE_BUFFER_SIZE = 256 * 1024 * 1024
diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
index aa9be9119d..54ebf071d9 100644
--- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
+++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
@@ -11,9 +11,6 @@ from vllm.attention.backends.abstract import AttentionLayer
 from vllm.attention.ops.rocm_aiter_mla import aiter_mla_decode_fwd
 from vllm.config import VllmConfig
 from vllm.utils import cdiv
-
-# yapf conflicts with isort for this docstring
-# yapf: disable
 from vllm.v1.attention.backends.mla.common import (
     MLACommonBackend,
     MLACommonDecodeMetadata,
@@ -24,8 +21,6 @@ from vllm.v1.attention.backends.mla.common import (
 from vllm.v1.attention.backends.utils import AttentionCGSupport
 from vllm.v1.kv_cache_interface import AttentionSpec
 
-# yapf: enable
-
 
 def is_aiter_mla_enabled() -> bool:
     return envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MLA
diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py
index f7a73cba60..747d08dcd3 100644
--- a/vllm/v1/serial_utils.py
+++ b/vllm/v1/serial_utils.py
@@ -18,8 +18,6 @@ from msgspec import msgpack
 
 from vllm import envs
 from vllm.logger import init_logger
-
-# yapf: disable
 from vllm.multimodal.inputs import (
     BaseMultiModalField,
     MultiModalBatchedField,
@@ -32,8 +30,6 @@ from vllm.multimodal.inputs import (
     MultiModalSharedField,
     NestedTensors,
 )
-
-# yapf: enable
 from vllm.v1.engine import UtilityResult
 
 logger = init_logger(__name__)
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 90429b6b0c..41c02bc410 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -48,9 +48,6 @@ from vllm.model_executor.layers.mamba.abstract import MambaBase
 from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
 from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader
 from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.model_executor.models.interfaces import (
     SupportsMultiModal,
     is_mixture_of_experts,
@@ -59,8 +56,6 @@ from vllm.model_executor.models.interfaces import (
     supports_multimodal_pruning,
     supports_transcription,
 )
-
-# yapf: enable
 from vllm.model_executor.models.interfaces_base import (
     VllmModelForPooling,
     is_pooling_model,
@@ -101,9 +96,6 @@ from vllm.v1.attention.backends.utils import (
     split_attn_metadata,
 )
 from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher
-
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.v1.kv_cache_interface import (
     AttentionSpec,
     ChunkedLocalAttentionSpec,
@@ -118,8 +110,6 @@ from vllm.v1.kv_cache_interface import (
     SlidingWindowSpec,
     UniformTypeKVCacheSpecs,
 )
-
-# yapf: enable
 from vllm.v1.outputs import (
     EMPTY_MODEL_RUNNER_OUTPUT,
     AsyncModelRunnerOutput,