Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-06 06:12:40 +01:00
committed by GitHub
parent 91ac7f764d
commit 6c04638214
65 changed files with 301 additions and 291 deletions

View File

@ -165,7 +165,7 @@ def test_env(
# FlashMLA only supports block_size == 64
pytest.skip("FlashMLA only supports block_size 64")
else:
from vllm.v1.attention.backends.mla.flashmla import ( # noqa: E501
from vllm.v1.attention.backends.mla.flashmla import (
is_flashmla_supported,
)

View File

@ -331,7 +331,8 @@ class WeightTensors:
in_dtype=config.dtype,
quant_dtype=config.quant_dtype,
block_shape=config.quant_block_shape,
per_out_ch_quant=config.is_per_act_token_quant, # or config.is_per_out_ch_quant
# or config.is_per_out_ch_quant
per_out_ch_quant=config.is_per_act_token_quant,
)
return WeightTensors(
w1=w1, w2=w2, w1_scale=w1_scale, w2_scale=w2_scale, w1_gs=w1_gs, w2_gs=w2_gs

View File

@ -124,7 +124,7 @@ def make_feature_matrix(csv_file_path: str):
results_df: Optional[pd.DataFrame] = None
for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm(
combinations
): # noqa: E501
):
config = Config(
Ms=[m],
K=k,

View File

@ -10,7 +10,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
BatchedDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (
BatchedTritonOrDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.config import (
@ -196,10 +196,10 @@ register_experts(
# Disable on blackwell for now
if has_deep_ep() and not current_platform.has_device_capability(100):
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)
@ -233,7 +233,7 @@ if has_pplx():
)
if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100):
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
FlashInferExperts,
)
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501

View File

@ -17,10 +17,10 @@ from typing_extensions import Concatenate, ParamSpec
from vllm.utils import get_open_port, has_deep_ep
if has_deep_ep():
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)

View File

@ -30,10 +30,10 @@ from .parallel_utils import ProcessGroupInfo, parallel_launch
from .utils import make_test_weights
if has_deep_ep():
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)

View File

@ -28,10 +28,10 @@ from ...utils import multi_gpu_test
from .parallel_utils import ProcessGroupInfo, parallel_launch
if has_deep_ep():
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)

View File

@ -271,7 +271,7 @@ if __name__ == "__main__":
parser = make_config_arg_parser(
description=(
"Run single prepare-finalize & fused-experts combination test"
"Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations " # noqa: E501
"Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "
"--pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts"
)
)

View File

@ -483,8 +483,8 @@ def test_mixtral_moe(
}
if use_rocm_aiter:
# The values of rtol and atol are set based on the tests in ROCM AITER package. # noqa: E501
# https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174 # noqa: E501
# The values of rtol and atol are set based on the tests in ROCM AITER package.
# https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174
torch.testing.assert_close(
hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100
)

View File

@ -10,11 +10,11 @@ import pytest
import torch
from packaging import version
from vllm.model_executor.layers.quantization.quark.quark import ( # noqa: E501
from vllm.model_executor.layers.quantization.quark.quark import (
QuarkLinearMethod,
QuarkW4A4MXFP4,
)
from vllm.model_executor.layers.quantization.quark.quark_moe import ( # noqa: E501
from vllm.model_executor.layers.quantization.quark.quark_moe import (
QuarkW4A4MXFp4MoEMethod,
)
from vllm.platforms import current_platform