[pallas:mosaic_gpu] Add Pallas Mosaic GPU lowering for jax.lax.bitcast_convert_type.

petebu · Google-ML-Automation · commit 14da7ebb76d5 · 2024-11-20T03:41:19.000-08:00
Only handles the case where operand type and target type have the same bitwidth.

PiperOrigin-RevId: 698332564
diff --git a/jax/_src/pallas/mosaic_gpu/lowering.py b/jax/_src/pallas/mosaic_gpu/lowering.py
@@ -1501,6 +1501,31 @@ def _cond_lowering_rule(ctx: LoweringRuleContext, index, *args, branches):
   return list(switch_op.results)
 
 
+@register_lowering_rule(lax.bitcast_convert_type_p)
+def _bitcast_convert_type_lowering_rule(
+    ctx: LoweringRuleContext, operand, *, new_dtype
+):
+  # TODO(petebu) Handle case where src and dst types have different bitwidths
+  [operand_aval] = ctx.avals_in
+  operand = _ensure_fa(operand, operand_aval.dtype)
+  src_elem_type = mgpu_utils.dtype_to_ir_type(operand_aval.dtype)
+  dst_elem_type = mgpu_utils.dtype_to_ir_type(new_dtype)
+  assert isinstance(src_elem_type, (ir.IntegerType, ir.FloatType))
+  assert isinstance(dst_elem_type, (ir.IntegerType, ir.FloatType))
+  if src_elem_type.width != dst_elem_type.width:
+    raise NotImplementedError(
+        f"Can't bitcast from {operand_aval.dtype} to {new_dtype} because they"
+        " have different widths"
+    )
+  if ir.IntegerType.isinstance(dst_elem_type):
+    output_is_signed = mgpu_utils.is_signed(new_dtype)
+  else:
+    output_is_signed = None
+  return mgpu.FragmentedArray.bitcast(
+      operand, dst_elem_type, output_is_signed=output_is_signed
+  )
+
+
 def _bcast(
     x: ir.Value,
     y: ir.Value,
diff --git a/jax/experimental/mosaic/gpu/fragmented_array.py b/jax/experimental/mosaic/gpu/fragmented_array.py
@@ -929,15 +929,19 @@ def fast_instr(x):
         raise NotImplementedError(x.type)
     return fast_instr
 
-  def bitcast(self, elt: ir.Type):
+  def bitcast(self, elt: ir.Type, *, output_is_signed: bool | None = None):
+    if elt == self.mlir_dtype:
+      return self
     reg_type = self.registers.flat[0].type
     if ir.VectorType.isinstance(reg_type):
       reg_shape = ir.VectorType(reg_type).shape
       ty = ir.VectorType.get(reg_shape, elt)
     else:
       ty = elt
 
-    return self._pointwise(lambda x: arith.bitcast(ty, x))
+    return self._pointwise(
+        lambda x: arith.bitcast(ty, x), output_is_signed=output_is_signed
+    )
 
   def __getitem__(self, idx):
     if self.layout != WGMMA_LAYOUT:
diff --git a/tests/pallas/mosaic_gpu_test.py b/tests/pallas/mosaic_gpu_test.py
@@ -1052,6 +1052,30 @@ def kernel(x_ref, o_ref):
         self.assertEqual(data.count('"name": "store"'), 2)
       np.testing.assert_array_equal(y, x + x)
 
+  @parameterized.parameters(
+      (jnp.float16, jnp.float16),  # Noop
+      (jnp.int16, jnp.bfloat16),
+      (jnp.int16, jnp.float16),
+      (jnp.uint16, jnp.float16),
+      (jnp.float32, jnp.int32),
+      (jnp.float32, jnp.uint32),
+      (jnp.uint32, jnp.int32),
+      (jnp.int32, jnp.uint32),
+  )
+  def test_bitcast_convert_type(self, in_dtype, out_dtype):
+    m, n = 16, 8
+    out_shape = jax.ShapeDtypeStruct((m, n), out_dtype)
+    grid = ()
+
+    @functools.partial(pl.pallas_call, out_shape=out_shape, grid=grid)
+    def convert(x_ref, y_ref):
+      y_ref[...] = jax.lax.bitcast_convert_type(x_ref[...], out_shape)
+
+    x = jnp.arange(m * n, dtype=in_dtype).reshape((m, n))
+    y = convert(x)
+    y_ref = jax.lax.bitcast_convert_type(x, out_dtype)
+    np.testing.assert_array_equal(y, y_ref)
+
 
 class PipelineTest(PallasTest):