diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 3b82f8a98b..5488b65c62 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -142,7 +142,10 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase): # group_size=None means channelwise group_size = weight_quant.group_size or -1 # Prefer to use the MarlinMoE kernel when it is supported. - if not check_moe_marlin_supports_layer(layer, group_size): + if ( + not check_moe_marlin_supports_layer(layer, group_size) + or current_platform.is_rocm() + ): if ( weight_quant.strategy == QuantizationStrategy.GROUP and weight_quant.actorder