[Core] Store only the keys for multi-modal data in P0 (#22198)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-07 16:45:04 +08:00
committed by GitHub
parent 289b18e670
commit 766bc8162c
17 changed files with 325 additions and 234 deletions

View File

@ -9,7 +9,7 @@ import torch
import torch.nn.functional as F
from transformers import PretrainedConfig
from vllm.config import ModelConfig, RunnerOption
from vllm.config import ModelConfig, ModelDType, RunnerOption
from vllm.inputs import InputContext
from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs
@ -257,7 +257,7 @@ def check_logprobs_close(
def build_model_context(
model_id: str,
runner: RunnerOption = "auto",
dtype: Union[str, torch.dtype] = "auto",
dtype: ModelDType = "auto",
model_config_kwargs: Optional[dict[str, Any]] = None,
mm_processor_kwargs: Optional[dict[str, Any]] = None,
limit_mm_per_prompt: Optional[dict[str, int]] = None,
@ -279,6 +279,7 @@ def build_model_context(
model_info.check_transformers_version(on_fail="skip")
model_config_kwargs = model_config_kwargs or {}
limit_mm_per_prompt = limit_mm_per_prompt or {}
model_config = ModelConfig(
model_id,
runner=runner,