Fix per file ruff ignores related to line length (#26262)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@ -165,7 +165,7 @@ def test_env(
|
||||
# FlashMLA only supports block_size == 64
|
||||
pytest.skip("FlashMLA only supports block_size 64")
|
||||
else:
|
||||
from vllm.v1.attention.backends.mla.flashmla import ( # noqa: E501
|
||||
from vllm.v1.attention.backends.mla.flashmla import (
|
||||
is_flashmla_supported,
|
||||
)
|
||||
|
||||
|
||||
@ -331,7 +331,8 @@ class WeightTensors:
|
||||
in_dtype=config.dtype,
|
||||
quant_dtype=config.quant_dtype,
|
||||
block_shape=config.quant_block_shape,
|
||||
per_out_ch_quant=config.is_per_act_token_quant, # or config.is_per_out_ch_quant
|
||||
# or config.is_per_out_ch_quant
|
||||
per_out_ch_quant=config.is_per_act_token_quant,
|
||||
)
|
||||
return WeightTensors(
|
||||
w1=w1, w2=w2, w1_scale=w1_scale, w2_scale=w2_scale, w1_gs=w1_gs, w2_gs=w2_gs
|
||||
|
||||
@ -124,7 +124,7 @@ def make_feature_matrix(csv_file_path: str):
|
||||
results_df: Optional[pd.DataFrame] = None
|
||||
for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm(
|
||||
combinations
|
||||
): # noqa: E501
|
||||
):
|
||||
config = Config(
|
||||
Ms=[m],
|
||||
K=k,
|
||||
|
||||
@ -10,7 +10,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk
|
||||
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
|
||||
BatchedDeepGemmExperts,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (
|
||||
BatchedTritonOrDeepGemmExperts,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.config import (
|
||||
@ -196,10 +196,10 @@ register_experts(
|
||||
|
||||
# Disable on blackwell for now
|
||||
if has_deep_ep() and not current_platform.has_device_capability(100):
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
|
||||
DeepEPHTPrepareAndFinalize,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
|
||||
DeepEPLLPrepareAndFinalize,
|
||||
)
|
||||
|
||||
@ -233,7 +233,7 @@ if has_pplx():
|
||||
)
|
||||
|
||||
if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100):
|
||||
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
|
||||
FlashInferExperts,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501
|
||||
|
||||
@ -17,10 +17,10 @@ from typing_extensions import Concatenate, ParamSpec
|
||||
from vllm.utils import get_open_port, has_deep_ep
|
||||
|
||||
if has_deep_ep():
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
|
||||
DeepEPHTPrepareAndFinalize,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
|
||||
DeepEPLLPrepareAndFinalize,
|
||||
)
|
||||
|
||||
|
||||
@ -30,10 +30,10 @@ from .parallel_utils import ProcessGroupInfo, parallel_launch
|
||||
from .utils import make_test_weights
|
||||
|
||||
if has_deep_ep():
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
|
||||
DeepEPHTPrepareAndFinalize,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
|
||||
DeepEPLLPrepareAndFinalize,
|
||||
)
|
||||
|
||||
|
||||
@ -28,10 +28,10 @@ from ...utils import multi_gpu_test
|
||||
from .parallel_utils import ProcessGroupInfo, parallel_launch
|
||||
|
||||
if has_deep_ep():
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
|
||||
DeepEPHTPrepareAndFinalize,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
|
||||
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
|
||||
DeepEPLLPrepareAndFinalize,
|
||||
)
|
||||
|
||||
|
||||
@ -271,7 +271,7 @@ if __name__ == "__main__":
|
||||
parser = make_config_arg_parser(
|
||||
description=(
|
||||
"Run single prepare-finalize & fused-experts combination test"
|
||||
"Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations " # noqa: E501
|
||||
"Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "
|
||||
"--pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts"
|
||||
)
|
||||
)
|
||||
|
||||
@ -483,8 +483,8 @@ def test_mixtral_moe(
|
||||
}
|
||||
|
||||
if use_rocm_aiter:
|
||||
# The values of rtol and atol are set based on the tests in ROCM AITER package. # noqa: E501
|
||||
# https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174 # noqa: E501
|
||||
# The values of rtol and atol are set based on the tests in ROCM AITER package.
|
||||
# https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174
|
||||
torch.testing.assert_close(
|
||||
hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100
|
||||
)
|
||||
|
||||
@ -10,11 +10,11 @@ import pytest
|
||||
import torch
|
||||
from packaging import version
|
||||
|
||||
from vllm.model_executor.layers.quantization.quark.quark import ( # noqa: E501
|
||||
from vllm.model_executor.layers.quantization.quark.quark import (
|
||||
QuarkLinearMethod,
|
||||
QuarkW4A4MXFP4,
|
||||
)
|
||||
from vllm.model_executor.layers.quantization.quark.quark_moe import ( # noqa: E501
|
||||
from vllm.model_executor.layers.quantization.quark.quark_moe import (
|
||||
QuarkW4A4MXFp4MoEMethod,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
Reference in New Issue
Block a user