diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 21b2e395c7..284b1301d0 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -350,6 +350,14 @@ class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM): dummy_inputs=Qwen3VLDummyInputsBuilder, ) class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + } + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super(Qwen3VLForConditionalGeneration, self).__init__() config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config @@ -376,6 +384,11 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): self.language_model = Qwen3MoeLLMForCausalLM( vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model") ) + # Whether to include the gate_up_proj mapping is determined by + # the language model. + self.packed_modules_mapping = ( + self.packed_modules_mapping | self.language_model.packed_modules_mapping + ) self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors