diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index 3697275a7b64..5e455a2852f4 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -541,7 +541,6 @@ def unified_attention_with_output(
                         output_scale=output_scale,
                         positions=positions)
     else:
-        assert positions is None, f"positions must be None {positions=}"
         self.impl.forward(self,
                         query,
                         key,