[ROCM] Enable CompressedTensorsWNA16 (#27187)

Signed-off-by: JartX <sagformas@epdcenter.es>
2025-10-21 16:43:23 +02:00
parent bd66b8529b
commit ba09652de2
1 changed files with 4 additions and 1 deletions
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@ -142,7 +142,10 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
            # group_size=None means channelwise
            group_size = weight_quant.group_size or -1
            # Prefer to use the MarlinMoE kernel when it is supported.
-            if not check_moe_marlin_supports_layer(layer, group_size):
+            if (
+                not check_moe_marlin_supports_layer(layer, group_size)
+                or current_platform.is_rocm()
+            ):
                if (
                    weight_quant.strategy == QuantizationStrategy.GROUP
                    and weight_quant.actorder