diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 3697275a7b64..5e455a2852f4 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -541,7 +541,6 @@ def unified_attention_with_output( output_scale=output_scale, positions=positions) else: - assert positions is None, f"positions must be None {positions=}" self.impl.forward(self, query, key,