[Mosaic GPU] Commit to using Vectors everywhere (and no Tensors).

bchetioui · Google-ML-Automation · commit 3915f4a1477d · 2024-12-19T07:51:58.000-08:00
PiperOrigin-RevId: 707912637
diff --git a/jax/experimental/mosaic/gpu/layout_inference.py b/jax/experimental/mosaic/gpu/layout_inference.py
@@ -164,16 +164,14 @@ def inference_step(op: ir.Operation):
   def to_default_layout(ty: ir.Type) -> ir.Attribute | None:
     if ir.VectorType.isinstance(ty):
       layout = WGStridedFragLayout.from_shaped_type(ty)
-    elif ir.RankedTensorType.isinstance(ty):
-      layout = WGStridedFragLayout.from_shaped_type(ty)
     else:
       return None
     return to_strided_fragmented_layout_attr(layout)
 
   def set_default_layout(op: ir.OpView):
     if should_have_layout(op) and not has_any_layout_set(op):
       # TODO(bchetioui): consistently set layouts only for supported argument
-      # types (i.e. skip non-vector/tensor typed arguments.)
+      # types (i.e. skip non-vector typed arguments.)
       in_layouts = []
       for operand in op.operands:
         if (layout := to_default_layout(operand.type)) is not None:
diff --git a/jax/experimental/mosaic/gpu/layouts.py b/jax/experimental/mosaic/gpu/layouts.py
@@ -72,10 +72,7 @@ def to_splat_fragmented_layout_attr(layout: WGSplatFragLayout) -> ir.Attribute:
 def should_have_layout(op: ir.OpView) -> bool:
   """Returns 'true' if the operation should be assigned a layout."""
 
-  def is_array(v: ir.Value):
-    ty = v.type
-    return ir.RankedTensorType.isinstance(ty) or ir.VectorType.isinstance(ty)
-
+  is_array = lambda v: ir.VectorType.isinstance(v.type)
   return any(map(is_array, itertools.chain(op.operands, op.results)))  # type: ignore
 
 
diff --git a/jaxlib/mosaic/dialect/gpu/mosaic_gpu.td b/jaxlib/mosaic/dialect/gpu/mosaic_gpu.td
@@ -327,7 +327,7 @@ def MosaicGPU_WGMMAOp : Op<MosaicGPU_Dialect, "wgmma", []> {
     memrefs. `a` and `b` must have the same element type and when `a` is in
     registers only F16 or BF16 are supported.
 
-    The `accumulator` must be a tensor with a FragmentedLayout. The WGMMA
+    The `accumulator` must be a vector with a FragmentedLayout. The WGMMA
     operation will be executed in the async proxy and any inputs in
     registers need to be synchronized with a memory fence.
 
@@ -338,10 +338,10 @@ def MosaicGPU_WGMMAOp : Op<MosaicGPU_Dialect, "wgmma", []> {
   }];
 
   let arguments = (ins
-    TensorOf<[MosaicGPU_WGMMASupportedType]>:$accumulator,
+    VectorOfAnyRankOf<[MosaicGPU_WGMMASupportedType]>:$accumulator,
     AnyTypeOf<[
       MemRefOf<[MosaicGPU_WGMMASupportedType]>,
-      TensorOf<[MosaicGPU_WGMMASupportedType]>]>:$a,
+      VectorOfAnyRankOf<[MosaicGPU_WGMMASupportedType]>]>:$a,
     MemRefOf<[MosaicGPU_WGMMASupportedType]>:$b,
 
     // Attributes
diff --git a/tests/mosaic/gpu_dialect_test.py b/tests/mosaic/gpu_dialect_test.py
@@ -487,7 +487,7 @@ def test_async_store_op_slice_lengths_size_must_match_source_rank(self):
   def test_wgmma_types_match(self):
     with ir.InsertionPoint(self.module.body):
       func.FuncOp.from_py_func(
-          ir.RankedTensorType.get([128, 160], ir.BF16Type.get()),
+          ir.VectorType.get([128, 160], ir.BF16Type.get()),
           ir.MemRefType.get([2, 4, 64, 32], ir.F16Type.get()),
           ir.MemRefType.get([4, 5, 32, 32], ir.BF16Type.get()),
           name="wgmma",
@@ -509,7 +509,7 @@ def test_wgmma_types_match(self):
   def test_wgmma_b_rank_is_4(self):
     with ir.InsertionPoint(self.module.body):
       func.FuncOp.from_py_func(
-          ir.RankedTensorType.get([128, 160], ir.BF16Type.get()),
+          ir.VectorType.get([128, 160], ir.BF16Type.get()),
           ir.MemRefType.get([2, 4, 64, 32], ir.BF16Type.get()),
           ir.MemRefType.get([5, 32, 32], ir.BF16Type.get()),
           name="wgmma",
@@ -531,7 +531,7 @@ def test_wgmma_b_rank_is_4(self):
   def test_wgmma_b_shape_dim_3(self):
     with ir.InsertionPoint(self.module.body):
       func.FuncOp.from_py_func(
-          ir.RankedTensorType.get([128, 160], ir.BF16Type.get()),
+          ir.VectorType.get([128, 160], ir.BF16Type.get()),
           ir.MemRefType.get([2, 4, 64, 32], ir.BF16Type.get()),
           ir.MemRefType.get([4, 5, 32, 16], ir.BF16Type.get()),
           name="wgmma",
@@ -554,7 +554,7 @@ def test_wgmma_b_shape_dim_3(self):
   def test_wgmma_b_shape_dim_2(self):
     with ir.InsertionPoint(self.module.body):
       func.FuncOp.from_py_func(
-          ir.RankedTensorType.get([128, 160], ir.BF16Type.get()),
+          ir.VectorType.get([128, 160], ir.BF16Type.get()),
           ir.MemRefType.get([2, 4, 64, 32], ir.BF16Type.get()),
           ir.MemRefType.get([4, 5, 64, 32], ir.BF16Type.get()),
           name="wgmma",
diff --git a/tests/mosaic/gpu_layout_inference_test.py b/tests/mosaic/gpu_layout_inference_test.py
@@ -45,13 +45,12 @@ def setUp(self):
     self.enter_context(ir.Location.unknown())
     self.module = ir.Module.create()
 
-  @parameterized.parameters(ir.RankedTensorType, ir.VectorType)
-  def test_infer_layout_default(self, type_constructor):
+  def test_infer_layout_default(self):
     shape = (16, 8)
     elt_type = ir.BF16Type.get()
 
     with ir.InsertionPoint(self.module.body):
-      ab_type = type_constructor.get(shape, elt_type)
+      ab_type = ir.VectorType.get(shape, elt_type)
       const_zero = ir.FloatAttr.get(elt_type, 0)
       const_one = ir.FloatAttr.get(elt_type, 1)
       a = arith.ConstantOp(
@@ -80,13 +79,12 @@ def test_infer_layout_default(self, type_constructor):
           op.attributes["out_layouts"], [layout] * len(op.results)
       )
 
-  @parameterized.parameters(ir.RankedTensorType, ir.VectorType)
-  def test_infer_layout_for_pointwise_op(self, type_constructor):
+  def test_infer_layout_for_pointwise_op(self):
     shape = (4, 8)
     elt_type = ir.BF16Type.get()
 
     with ir.InsertionPoint(self.module.body):
-      ab_type = type_constructor.get(shape, elt_type)
+      ab_type = ir.VectorType.get(shape, elt_type)
       const_zero = ir.FloatAttr.get(elt_type, 0)
       const_one = ir.FloatAttr.get(elt_type, 1)
       a = arith.ConstantOp(