[Bugfix] [ROCm]: Remove assertion logic when using AITER fused moe in unquantizedMethod to reenable LLama4 BF16 (#18205)

Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
2025-05-16 00:53:18 +08:00
parent fadb8d5c2d
commit 92540529c0
1 changed files with 0 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -503,7 +503,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
            indices_type=torch.uint32 if self.moe.use_pplx_kernels else None)

        if self.rocm_aiter_moe_enabled:
-            assert not apply_router_weight_on_input
            assert expert_map is None
            return self.rocm_aiter_fused_experts(
                hidden_states=x,