[CI/Build] Avoid CUDA initialization (#8534)
This commit is contained in:
@ -86,9 +86,7 @@ def test_load_fp16_model(vllm_runner, kv_cache_dtype: str, force_marlin: bool,
|
||||
assert attn._k_scale == 1.0
|
||||
assert attn._v_scale == 1.0
|
||||
|
||||
capability = current_platform.get_device_capability()
|
||||
capability = capability[0] * 10 + capability[1]
|
||||
if capability >= 89 and not force_marlin:
|
||||
if current_platform.has_device_capability(89) and not force_marlin:
|
||||
# For GPUs with hardware support, we keep weights in fp8
|
||||
assert fc1.weight.dtype == torch.float8_e4m3fn
|
||||
else:
|
||||
|
||||
@ -8,6 +8,8 @@ def is_quant_method_supported(quant_method: str) -> bool:
|
||||
return False
|
||||
|
||||
capability = current_platform.get_device_capability()
|
||||
capability = capability[0] * 10 + capability[1]
|
||||
return (capability >=
|
||||
QUANTIZATION_METHODS[quant_method].get_min_capability())
|
||||
assert capability is not None
|
||||
|
||||
min_capability = QUANTIZATION_METHODS[quant_method].get_min_capability()
|
||||
|
||||
return capability.to_int() >= min_capability
|
||||
|
||||
Reference in New Issue
Block a user