[ROCM] Enable CompressedTensorsWNA16 (#27187)

Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
JartX
2025-10-21 16:43:23 +02:00
committed by GitHub
parent bd66b8529b
commit ba09652de2

View File

@ -142,7 +142,10 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
# group_size=None means channelwise
group_size = weight_quant.group_size or -1
# Prefer to use the MarlinMoE kernel when it is supported.
if not check_moe_marlin_supports_layer(layer, group_size):
if (
not check_moe_marlin_supports_layer(layer, group_size)
or current_platform.is_rocm()
):
if (
weight_quant.strategy == QuantizationStrategy.GROUP
and weight_quant.actorder