[pallas:triton] Emit a better error message for matmul with non-2D operands

superbobry · Google-ML-Automation · commit 155839bb4d35 · 2025-03-04T05:46:29.000-08:00
Triton seems to support both 2D and 3D operands now, the latter case being a batched matmul. We need more changes in the lowering to support 3D, so I will leave it out of scope here. Fixes jax-ml#26013. PiperOrigin-RevId: 733293299
diff --git a/jax/_src/pallas/triton/lowering.py b/jax/_src/pallas/triton/lowering.py
@@ -2261,6 +2261,9 @@ def _dot_general_lowering(
 
   a_type = ir.RankedTensorType(a.type)
   b_type = ir.RankedTensorType(b.type)
+  if len(a_type.shape) != len(b_type.shape) != 2:
+    raise ValueError("a and b must be 2D, but got:"
+                     f" {a_type.shape} and {b_type.shape}")
   if min(*b_type.shape) < 16:
     raise ValueError("all dimensions of b must be >= 16 ")
   if a_type.element_type != b_type.element_type:
diff --git a/tests/pallas/pallas_test.py b/tests/pallas/pallas_test.py
@@ -733,6 +733,25 @@ def dot_kernel(x_ref, y_ref, o_ref):
     )
     self.assertAllClose(dot_kernel(x, y), expected, atol=5e-2, rtol=5e-3)
 
+  def test_dot_with_vector(self):
+    if not jtu.test_device_matches(["gpu"]) or self.INTERPRET:
+      self.skipTest(
+          "jnp.dot is only restricted to 2D on GPU in non-interpret mode."
+      )
+
+    @functools.partial(
+        self.pallas_call,
+        out_shape=jax.ShapeDtypeStruct((32,), jnp.float32),
+    )
+    def dot_kernel(x_ref, y_ref, o_ref):
+      o_ref[()] = jnp.dot(x_ref[()], y_ref[()])
+
+    key0, key1 = random.split(random.key(0))
+    x = random.normal(key0, (32, 64), dtype=jnp.float32)
+    y = random.normal(key1, (64,), dtype=jnp.float32)
+    with self.assertRaisesRegex(Exception, "must be 2D"):
+      dot_kernel(x, y)
+
   @parameterized.parameters(jnp.int4, jnp.uint4)
   def test_subbyte_load(self, dtype):
     if not jtu.test_device_matches(["gpu"]):