[Model] Update Paligemma multimodal processing with PromptUpdate (#14015)

Signed-off-by: Kyle Huang <kylhuang@nvidia.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-03-06 02:31:38 -06:00
parent ed6ea06577
commit 1769928079
4 changed files with 152 additions and 92 deletions
--- a/tests/models/decoder_only/vision_language/test_models.py
+++ b/tests/models/decoder_only/vision_language/test_models.py
@ -116,9 +116,8 @@ VLM_TEST_SETTINGS = {
            "pixel_values"
        ),
        vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
-        dtype=("half" if current_platform.is_cpu() or current_platform.is_rocm()
-               else ("half", "float")),
-        marks=[pytest.mark.core_model],
+        dtype="bfloat16",
+        marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")],  # noqa: E501
    ),
    # TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL
    # once we upgraded to transformers>=4.49.0.
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@ -175,6 +175,8 @@ def _test_processing_correctness(
    "Qwen/Qwen2-Audio-7B-Instruct",
    "fixie-ai/ultravox-v0_4",
    "openai/whisper-large-v3",
+    "google/paligemma-3b-mix-224",
+    "google/paligemma2-3b-ft-docci-448",
 ])
@pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
@pytest.mark.parametrize("num_batches", [32])