We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 44ef111 commit b65124dCopy full SHA for b65124d
vllm/model_executor/models/llama4.py
@@ -245,7 +245,7 @@ def forward(
245
# rotary_emb is fused into self.attn in this case
246
if self.use_fused_rope:
247
assert not (
248
- self.attn_temperature_tuning
+ self.attn_temperature_tuning or self.nope
249
), f"{self.attn_temperature_tuning=} and {self.nope=} must be False with {VLLM_ROCM_USE_AITER_TRITON_FUSED_ROPE_ZEROS_KV_CACHE=}"
250
attn_output = self.attn(q, k, v, positions=positions)
251
output, _ = self.o_proj(attn_output)
0 commit comments