[Bugfix] Proper input validation for multi-modal encoder-decoder models (#16156)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-04-09 00:45:21 +08:00
parent dc96fd54c6
commit 4ebc0b9640
10 changed files with 113 additions and 62 deletions
--- a/tests/models/encoder_decoder/vision_language/test_mllama.py
+++ b/tests/models/encoder_decoder/vision_language/test_mllama.py
@ -211,7 +211,7 @@ def _run_test(
    # max_model_len should be greater than image_feature_size
    with vllm_runner(model,
                     dtype=dtype,
-                     max_model_len=4096,
+                     max_model_len=8192,
                     max_num_seqs=3,
                     tensor_parallel_size=tensor_parallel_size,
                     distributed_executor_backend=distributed_executor_backend,
@ -422,7 +422,7 @@ def test_bnb_regression(
    llm = LLM(
        model=model,
        dtype=dtype,
-        max_model_len=4096,
+        max_model_len=8192,
        max_num_seqs=2,
        quantization="bitsandbytes",
    )
@ -475,7 +475,7 @@ def test_explicit_implicit_prompt(
    llm = LLM(
        model=model,
        dtype=dtype,
-        max_model_len=4096,
+        max_model_len=8192,
        max_num_seqs=2,
        tensor_parallel_size=1,
    )
@ -506,7 +506,7 @@ def test_regression(vllm_runner, image_assets, model, dtype, max_tokens,
    with global_force_attn_backend_context_manager(attn_backend), vllm_runner(
            model,
            dtype=dtype,
-            max_model_len=4096,
+            max_model_len=8192,
            max_num_seqs=2,
            tensor_parallel_size=1,
            limit_mm_per_prompt={"image":