Revert "[V1] Scatter and gather placeholders in the model runner" (#16075)

2025-04-04 14:50:57 -07:00
parent f5722a5052
commit af51d80fa1
42 changed files with 942 additions and 496 deletions
--- a/tests/models/decoder_only/audio_language/test_ultravox.py
+++ b/tests/models/decoder_only/audio_language/test_ultravox.py
@ -55,10 +55,7 @@ def server(request, audio_assets):
        for key, value in request.param.items()
    ]

-    with RemoteOpenAIServer(MODEL_NAME,
-                            args,
-                            env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":
-                                      "30"}) as remote_server:
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server


--- a/tests/models/decoder_only/vision_language/test_models.py
+++ b/tests/models/decoder_only/vision_language/test_models.py
@ -167,7 +167,7 @@ VLM_TEST_SETTINGS = {
            "cherry_blossom": "<image>What is the season?",  # noqa: E501
        }),
        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
-        max_model_len=4096,
+        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}
--- a/tests/models/decoder_only/vision_language/test_pixtral.py
+++ b/tests/models/decoder_only/vision_language/test_pixtral.py
@ -176,8 +176,6 @@ def test_chat(
            model,
            dtype=dtype,
            tokenizer_mode="mistral",
-            load_format="mistral",
-            config_format="mistral",
            max_model_len=max_model_len,
            limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
    ) as vllm_model:
@ -200,14 +198,22 @@ def test_chat(


@large_gpu_test(min_gb=48)
-@pytest.mark.parametrize("prompt,expected_ranges",
-                         [(_create_engine_inputs_hf(IMG_URLS[:1]),
-                           [PlaceholderRange(offset=11, length=494)]),
-                          (_create_engine_inputs_hf(IMG_URLS[1:4]), [
-                              PlaceholderRange(offset=11, length=266),
-                              PlaceholderRange(offset=277, length=1056),
-                              PlaceholderRange(offset=1333, length=418)
-                          ])])
+@pytest.mark.parametrize(
+    "prompt,expected_ranges",
+    [(_create_engine_inputs_hf(IMG_URLS[:1]), [{
+        "offset": 11,
+        "length": 494
+    }]),
+     (_create_engine_inputs_hf(IMG_URLS[1:4]), [{
+         "offset": 11,
+         "length": 266
+     }, {
+         "offset": 277,
+         "length": 1056
+     }, {
+         "offset": 1333,
+         "length": 418
+     }])])
 def test_multi_modal_placeholders(vllm_runner, prompt,
                                  expected_ranges: list[PlaceholderRange],
                                  monkeypatch) -> None:
--- a/tests/models/multimodal/processing/test_llava_next.py
+++ b/tests/models/multimodal/processing/test_llava_next.py
@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
        first_placeholder = image_placeholders[0]

        # NOTE: There is a BOS token
-        assert first_placeholder.offset == 1
-        assert first_placeholder.length == (
+        assert first_placeholder["offset"] == 1
+        assert first_placeholder["length"] == (
            len(processed_inputs["prompt_token_ids"]) - 1) // num_imgs

    except Exception as exc:
--- a/tests/models/multimodal/processing/test_llava_onevision.py
+++ b/tests/models/multimodal/processing/test_llava_onevision.py
@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(

        first_placeholder = image_placeholders[0]

-        assert first_placeholder.offset == 0
-        assert first_placeholder.length == len(
+        assert first_placeholder["offset"] == 0
+        assert first_placeholder["length"] == len(
            processed_inputs["prompt_token_ids"]) // num_imgs
    except Exception as exc:
        failed_size_excs.append((image_size, exc))
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@ -277,9 +277,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                        trust_remote_code=True,
                                        hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
    "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
-                                      extras={"2b": "h2oai/h2ovl-mississippi-2b"},  # noqa: E501
-                                      max_transformers_version="4.48",  # noqa: E501
-                                      transformers_version_reason="HF model is not compatible."),  # noqa: E501
+                                      extras={"2b": "h2oai/h2ovl-mississippi-2b"}),  # noqa: E501
    "InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
                                         extras={"2B": "OpenGVLab/InternVL2-2B"},  # noqa: E501
                                         trust_remote_code=True),