config format

Signed-off-by: Sage Moore <sage@neuralmagic.com>
2025-06-02 19:13:27 +00:00
parent 92e0cc79a8
commit 44a595f6d6
1 changed files with 12 additions and 12 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -1740,14 +1740,14 @@ class ParallelConfig:

    rank: int = 0
    """Global rank in distributed setup."""
-    
+
    enable_microbatching: bool = False
    """Enable microbatching for the model executor."""
-    
+
    always_microbatch_if_enabled: bool = True
-    """Always microbatch if microbatching is enabled. Easier to sync bewteen
+    """Always microbatch if microbatching is enabled. Easier to sync between
       dp workers."""
-    
+
    microbatching_token_threshold: int = 4
    """The threshold for microbatching. If the number of tokens in the
    request is greater than this threshold, microbatching will be used.
@ -4324,16 +4324,16 @@ class VllmConfig:
                "full_cuda_graph is not supported with "
                "cascade attention. Disabling cascade attention.")
            self.model_config.disable_cascade_attn = True
-    
-        if self.parallel_config.enable_microbatching:
+
+        if self.parallel_config.enable_microbatching and \
+            self.compilation_config.level >= CompilationLevel.PIECEWISE:
            # Microbatching is not supported with piecewise compilation yet.
            #  More specifically piecewise cuda-graphs
-            if self.compilation_config.level >= CompilationLevel.PIECEWISE:
-                logger.warning_once(
-                    "Piecewise compilation is not supported with "
-                    "microbatching. Disabling piecewiseching compilation.")
-                self.compilation_config.level = CompilationLevel.NO_COMPILATION
-             
+            logger.warning_once(
+                "Piecewise compilation is not supported with "
+                "microbatching. Disabling piecewiseching compilation.")
+            self.compilation_config.level = CompilationLevel.NO_COMPILATION
+

        if self.model_config and self.model_config.use_mla and \
            not (current_platform.is_cuda() or current_platform.is_rocm()):