From 1ee35382cb3d87cc4438c2c291df78d51e5eb18f Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Thu, 9 Oct 2025 18:13:27 -0400 Subject: [PATCH] [Bug] Fix modular_kernel: ZeroDivisionError: integer division or modulo by zero (#26528) Signed-off-by: yewentao256 --- .../model_executor/layers/fused_moe/modular_kernel.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 19e71f917e..b5602a112e 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -717,10 +717,13 @@ class FusedMoEModularKernel(torch.nn.Module): get num_chunks == 1. Take max(M, 1) to avoid divide by zero. If there are no tokens to process, the number of chunks will be zero. """ - CHUNK_SIZE = ( - max(M, 1) - if not self.fused_experts.supports_chunking() - else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE) + CHUNK_SIZE = max( + 1, + ( + M + if not self.fused_experts.supports_chunking() + else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE) + ), ) num_chunks = cdiv(M, CHUNK_SIZE) # If there are no tokens, then there should be no loop iterations.