add todo

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-10-27 09:52:17 -07:00
parent bc955355f8
commit b2f24cd6b7
1 changed files with 1 additions and 0 deletions
--- a/vllm/v1/attention/backends/mla/flashinfer_mla.py
+++ b/vllm/v1/attention/backends/mla/flashinfer_mla.py
@ -131,6 +131,7 @@ class FlashInferMLAImpl(MLACommonImpl[MLACommonMetadata]):
            self.bmm2_scale = layer._v_scale_float

        if vllm_is_batch_invariant():
+            # TODO(wentao): optimize this when it is supported by Flashinfer upstream.
            # execute per-request to eliminate batch-shape-dependent kernel paths.
            num = q.shape[0]
            outs = []