[Model] Refactor Phi-4-multimodal to use merged processor and support V1 (#15477)

Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Isotr0py
2025-04-19 17:26:11 +08:00
committed by GitHub
parent d9737ca1c6
commit 83f3c3bd91
15 changed files with 818 additions and 1246 deletions

View File

@ -1,14 +1,14 @@
# SPDX-License-Identifier: Apache-2.0
import json
from typing import Optional
from typing import Any, Optional
import numpy as np
import pytest
import pytest_asyncio
from transformers import AutoModel, AutoTokenizer
from vllm.multimodal.audio import resample_audio
from vllm.multimodal.audio import resample_audio_librosa
from vllm.sequence import SampleLogprobs
from ....conftest import HfRunner, VllmRunner
@ -43,6 +43,18 @@ def audio(request):
return AudioAsset(request.param)
def params_kwargs_to_cli_args(params_kwargs: dict[str, Any]) -> list[str]:
"""Convert kwargs to CLI args."""
args = []
for key, value in params_kwargs.items():
if isinstance(value, bool):
if value:
args.append(f"--{key.replace('_','-')}")
else:
args.append(f"--{key.replace('_','-')}={value}")
return args
@pytest.fixture(params=[
pytest.param({}, marks=pytest.mark.cpu_model),
pytest.param(CHUNKED_PREFILL_KWARGS),
@ -52,10 +64,7 @@ def server(request, audio_assets):
"--dtype", "bfloat16", "--max-model-len", "4096", "--enforce-eager",
"--limit-mm-per-prompt",
json.dumps({"audio": len(audio_assets)}), "--trust-remote-code"
] + [
f"--{key.replace('_','-')}={value}"
for key, value in request.param.items()
]
] + params_kwargs_to_cli_args(request.param)
with RemoteOpenAIServer(MODEL_NAME,
args,
@ -136,9 +145,9 @@ def run_test(
[hf_prompt],
max_tokens,
num_logprobs=num_logprobs,
audios=[(resample_audio(audio[0],
orig_sr=audio[1],
target_sr=16000), 16000)])
audios=[(resample_audio_librosa(audio[0],
orig_sr=audio[1],
target_sr=16000), 16000)])
for _, hf_prompt, audio in prompts_and_audios
]