support bf16 in model signature conversion

chunnienc · copybara-github · commit 167e20ffbbd5 · 2025-04-09T11:29:43.000-07:00
PiperOrigin-RevId: 745667254
diff --git a/ai_edge_torch/_convert/conversion.py b/ai_edge_torch/_convert/conversion.py
@@ -40,8 +40,8 @@ def _run_convert_passes(
       fx_passes.OptimizeLayoutTransposesPass(),
       fx_passes.CanonicalizePass(),
       fx_passes.BuildAtenCompositePass(),
-      fx_passes.CanonicalizePass(),
       fx_passes.RemoveNonUserOutputsPass(),
+      fx_passes.CastInputsBf16ToF32Pass(),
       fx_passes.CanonicalizePass(),
   ]
 
diff --git a/ai_edge_torch/_convert/fx_passes/__init__.py b/ai_edge_torch/_convert/fx_passes/__init__.py
@@ -17,6 +17,7 @@
 
 from ai_edge_torch._convert.fx_passes.build_aten_composite_pass import BuildAtenCompositePass
 from ai_edge_torch._convert.fx_passes.build_interpolate_composite_pass import BuildInterpolateCompositePass
+from ai_edge_torch._convert.fx_passes.cast_inputs_bf16_to_f32_pass import CastInputsBf16ToF32Pass
 from ai_edge_torch._convert.fx_passes.inject_mlir_debuginfo_pass import InjectMlirDebuginfoPass
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import OptimizeLayoutTransposesPass
 from ai_edge_torch._convert.fx_passes.remove_non_user_outputs_pass import RemoveNonUserOutputsPass
diff --git a/ai_edge_torch/_convert/fx_passes/cast_inputs_bf16_to_f32_pass.py b/ai_edge_torch/_convert/fx_passes/cast_inputs_bf16_to_f32_pass.py
@@ -0,0 +1,50 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Pass to cast all inputs with torch.bfloat16 type to torch.float32."""
+
+
+from ai_edge_torch import fx_infra
+import torch
+
+
+def cast_f32(x):
+  return x.to(torch.float32)
+
+
+class CastInputsBf16ToF32Pass(fx_infra.ExportedProgramPassBase):
+  """This pass casts all inputs with torch.bfloat16 type to torch.float32."""
+
+  def call(self, exported_program: torch.export.ExportedProgram):
+    modified = False
+    for node in exported_program.graph.nodes:
+      if (
+          node.op == "placeholder"
+          and node.meta.get("val").dtype == torch.bfloat16
+      ):
+        if not node.users:
+          continue
+
+        modified = True
+        user = next(iter(node.users))
+        with exported_program.graph.inserting_before(user):
+          cast_node = exported_program.graph.call_function(
+              cast_f32,
+              (node,),
+          )
+          node.replace_all_uses_with(cast_node)
+          cast_node.replace_input_with(cast_node, node)
+
+    exported_program.graph_module.recompile()
+    return fx_infra.ExportedProgramPassResult(exported_program, modified)
diff --git a/ai_edge_torch/_convert/test/test_convert.py b/ai_edge_torch/_convert/test/test_convert.py
@@ -553,6 +553,27 @@ def test_convert_resnet18_pt2e_per_channel(self):
       self.fail(f"PT2E conversion failed: {err}")
     # pylint: enable=broad-except
 
+  def test_convert_model_with_bfloat16_inputs(self):
+    """Test converting a simple model with torch.bfloat16 input.
+
+    bf16 inputs would remain in converted model signature but be casted to f32
+    right after the model inputs.
+    """
+
+    class SampleModel(nn.Module):
+
+      def forward(self, x: torch.Tensor):
+        return (x + 1) * 1.2
+
+    model = SampleModel().eval()
+    args = (torch.randn(10, 10).to(torch.bfloat16),)
+    # pylint: disable=broad-except
+    try:
+      ai_edge_torch.convert(model, args)
+    except Exception as err:
+      self.fail(f"Conversion failed with bloat16 inputs: {err}")
+    # pylint: enable=broad-except
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/ai_edge_torch/lowertools/odml_torch_utils.py b/ai_edge_torch/lowertools/odml_torch_utils.py
@@ -52,6 +52,7 @@ def torch_dtype_to_tf(dtype):
       torch.int32: tf.int32,
       torch.int16: tf.int16,
       torch.bool: tf.bool,
+      torch.bfloat16: tf.bfloat16,
   }.get(dtype)
 
 
diff --git a/ai_edge_torch/odml_torch/lowerings/_basic.py b/ai_edge_torch/odml_torch/lowerings/_basic.py
@@ -301,3 +301,22 @@ def _aten_slice_scatter(lctx, self, src, dim=0, start=None, end=None, step=1):
   )
   out = stablehlo.select(pred, self, src)
   return out
+
+
+# Schema:
+#   - aten::_to_copy(Tensor self, *, ScalarType? dtype=None,
+#       Layout? layout=None, Device? device=None, bool? pin_memory=None,
+#       bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor
+@lower(torch.ops.aten._to_copy.default)
+def _aten_to_copy(
+    lctx, x: ir.Value, dtype: torch.dtype | None = None, **kwargs
+):
+  if not dtype:
+    return x
+
+  return stablehlo.convert(
+      ir.RankedTensorType.get(
+          x.type.shape, utils.torch_dtype_to_ir_element_type(dtype)
+      ),
+      x,
+  )
diff --git a/ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py b/ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py
@@ -74,7 +74,6 @@ def lower_by_torch_xla2(op):
 lower_by_torch_xla2(torch.ops.aten._native_batch_norm_legit_no_training)
 lower_by_torch_xla2(torch.ops.aten._pdist_forward)
 lower_by_torch_xla2(torch.ops.aten._softmax)
-lower_by_torch_xla2(torch.ops.aten._to_copy)
 lower_by_torch_xla2(torch.ops.aten._unsafe_index)
 lower_by_torch_xla2(torch.ops.aten._unsafe_view)
 lower_by_torch_xla2(torch.ops.aten.acos)
diff --git a/ai_edge_torch/odml_torch/lowerings/utils.py b/ai_edge_torch/odml_torch/lowerings/utils.py
@@ -37,6 +37,7 @@ def torch_dtype_to_ir_element_type(dtype) -> ir.Type:
       torch.int16: functools.partial(ir.IntegerType.get_signless, 16),
       torch.int8: functools.partial(ir.IntegerType.get_signless, 8),
       torch.bool: functools.partial(ir.IntegerType.get_signless, 1),
+      torch.bfloat16: ir.BF16Type.get,
   }[dtype]
   return ty_get()
 

Original file line number	Diff line number	Diff line change
`@@ -40,8 +40,8 @@ def _run_convert_passes(`
`40`	`40`	`fx_passes.OptimizeLayoutTransposesPass(),`
`41`	`41`	`fx_passes.CanonicalizePass(),`
`42`	`42`	`fx_passes.BuildAtenCompositePass(),`
`43`		`- fx_passes.CanonicalizePass(),`
`44`	`43`	`fx_passes.RemoveNonUserOutputsPass(),`
	`44`	`+ fx_passes.CastInputsBf16ToF32Pass(),`
`45`	`45`	`fx_passes.CanonicalizePass(),`
`46`	`46`	`]`
`47`	`47`