Fix noisy warning for uncalibrated q_scale/p_scale (#17414)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@ -124,11 +124,12 @@ class BaseKVCacheMethod(QuantizeMethodBase):
|
||||
# These are used in the final Attention.forward()
|
||||
layer._q_scale.copy_(q_scale)
|
||||
layer._prob_scale.copy_(prob_scale)
|
||||
if q_scale == 1.0 or prob_scale == 1.0:
|
||||
if layer.kv_cache_dtype == "fp8" and (q_scale == 1.0
|
||||
or prob_scale == 1.0):
|
||||
logger.warning_once(
|
||||
f"Using Q scale {q_scale} and prob scale {prob_scale} "
|
||||
"with fp8 attention. This may cause accuracy issues. "
|
||||
"Please make sure Q/prob scaling factors are "
|
||||
f"Using uncalibrated q_scale {q_scale} and/or prob_scale "
|
||||
f"{prob_scale} with fp8 attention. This may cause accuracy "
|
||||
"issues. Please make sure q/prob scaling factors are "
|
||||
"available in the fp8 checkpoint.")
|
||||
|
||||
del layer.k_scale
|
||||
|
||||
Reference in New Issue
Block a user