From 1ee35382cb3d87cc4438c2c291df78d51e5eb18f Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Thu, 9 Oct 2025 18:13:27 -0400
Subject: [PATCH] [Bug] Fix modular_kernel: ZeroDivisionError: integer division
 or modulo by zero (#26528)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../model_executor/layers/fused_moe/modular_kernel.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
index 19e71f917e..b5602a112e 100644
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -717,10 +717,13 @@ class FusedMoEModularKernel(torch.nn.Module):
         get num_chunks == 1. Take max(M, 1) to avoid divide by zero.
         If there are no tokens to process, the number of chunks will be zero.
         """
-        CHUNK_SIZE = (
-            max(M, 1)
-            if not self.fused_experts.supports_chunking()
-            else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
+        CHUNK_SIZE = max(
+            1,
+            (
+                M
+                if not self.fused_experts.supports_chunking()
+                else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
+            ),
         )
         num_chunks = cdiv(M, CHUNK_SIZE)
         # If there are no tokens, then there should be no loop iterations.