Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
yewentao256
2025-10-27 09:52:17 -07:00
parent bc955355f8
commit b2f24cd6b7

View File

@ -131,6 +131,7 @@ class FlashInferMLAImpl(MLACommonImpl[MLACommonMetadata]):
self.bmm2_scale = layer._v_scale_float
if vllm_is_batch_invariant():
# TODO(wentao): optimize this when it is supported by Flashinfer upstream.
# execute per-request to eliminate batch-shape-dependent kernel paths.
num = q.shape[0]
outs = []