From c2bba690658823be5ec6f1742eb10294d4fa2479 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Sat, 18 Oct 2025 18:05:23 -0400 Subject: [PATCH] [BugFix] Disable fp8 kv-cache by default for DeepSeek V3.2 (#27121) Signed-off-by: Lucas Wilkinson Signed-off-by: Lucas Wilkinson Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/model_executor/models/config.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index da5d80f982..f1ec33ff3d 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -481,12 +481,9 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig): is_v32 = hasattr(hf_config, "index_topk") assert is_v32 - # For DeepSeekV3.2, we use a custom fp8 format as default (i.e. - # "auto") + # For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled. cache_config = vllm_config.cache_config - if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith( - "fp8" - ): + if cache_config.cache_dtype.startswith("fp8"): cache_config.cache_dtype = "fp8_ds_mla" logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2") if cache_config.cache_dtype == "bfloat16":