We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f61a3eb commit 0c02a74Copy full SHA for 0c02a74
vllm_ascend/patch/worker/patch_common/patch_attention_layer.py
@@ -196,6 +196,7 @@ def __init__(
196
self.q_range = torch.tensor(envs.Q_SCALE_CONSTANT, dtype=torch.float32)
197
self.k_range = torch.tensor(envs.K_SCALE_CONSTANT, dtype=torch.float32)
198
self.v_range = torch.tensor(envs.V_SCALE_CONSTANT, dtype=torch.float32)
199
+ self.query_quant = None
200
201
202
vllm.attention.Attention = AscendAttention
0 commit comments