diff --git a/ai_edge_torch/_convert/conversion.py b/ai_edge_torch/_convert/conversion.py index 73d31b7e..b86b751c 100644 --- a/ai_edge_torch/_convert/conversion.py +++ b/ai_edge_torch/_convert/conversion.py @@ -32,6 +32,7 @@ def _run_convert_passes( exported_program: torch.export.ExportedProgram, + cast_i64_inputs_to_i32: bool, ) -> torch.export.ExportedProgram: exported_program = generative_fx_passes.run_generative_passes( exported_program @@ -46,6 +47,10 @@ def _run_convert_passes( fx_passes.CastInputsBf16ToF32Pass(), ] + if cast_i64_inputs_to_i32: + print("---------------> Casting i64 inputs to i32") + passes += [fx_passes.CastInputsI64ToI32Pass()] + # Debuginfo is not injected automatically by odml_torch. Only inject # debuginfo via fx pass when using torch_xla. if ai_edge_torch.config.use_torch_xla: @@ -82,6 +87,7 @@ def convert_signatures( signatures: list[signature.Signature], *, strict_export: Union[Literal["auto"], bool] = True, + cast_i64_inputs_to_i32: bool = False, quant_config: Optional[qcfg.QuantConfig] = None, _tfl_converter_flags: Optional[dict[str, Any]] = None, _saved_model_dir: Optional[str] = None, @@ -96,6 +102,8 @@ def convert_signatures( and ensure the soundness of the exported graph. When strict_export="auto", the function will try to export module in both modes and use the first one succeeds for downstream conversion. + cast_i64_inputs_to_i32: If true, casts all inputs with torch.int64 type to + torch.int32. quant_config: User-defined quantization method and scheme of the model. _tfl_converter_flags: A nested dictionary allowing setting flags for the underlying tflite converter. @@ -147,7 +155,10 @@ def export(**kwargs): ] # Apply default fx passes - exported_programs = list(map(_run_convert_passes, exported_programs)) + exported_programs = [ + _run_convert_passes(ep, cast_i64_inputs_to_i32) + for ep in exported_programs + ] tflite_model = lowertools.exported_programs_to_tflite( exported_programs, signatures, diff --git a/ai_edge_torch/_convert/converter.py b/ai_edge_torch/_convert/converter.py index 50c05182..e6d28277 100644 --- a/ai_edge_torch/_convert/converter.py +++ b/ai_edge_torch/_convert/converter.py @@ -132,6 +132,7 @@ def convert( sample_kwargs=None, *, strict_export: Union[Literal["auto"], bool] = True, + cast_i64_inputs_to_i32: bool = False, quant_config: Optional[qcfg.QuantConfig] = None, dynamic_shapes: Optional[Union[dict[str, Any], Tuple[Any, ...]]] = None, _ai_edge_converter_flags: Optional[dict[str, Any]] = None, @@ -159,6 +160,8 @@ def convert( and ensure the soundness of the exported graph. When strict_export="auto", the function will try to export module in both modes and use the first one succeeds for downstream conversion. + cast_i64_inputs_to_i32: If true, casts all inputs with torch.int64 type to + torch.int32. quant_config: User-defined quantization method and scheme of the model. dynamic_shapes: Optional dict or tuple that specify dynamic shape specifications for each input in original order. See @@ -203,6 +206,7 @@ def convert( converted_model = conversion.convert_signatures( self._signatures, strict_export=strict_export, + cast_i64_inputs_to_i32=cast_i64_inputs_to_i32, quant_config=quant_config, _tfl_converter_flags=_ai_edge_converter_flags, _saved_model_dir=_saved_model_dir, @@ -271,6 +275,7 @@ def convert( sample_kwargs=None, *, strict_export: Union[Literal["auto"], bool] = True, + cast_i64_inputs_to_i32: bool = False, quant_config: Optional[qcfg.QuantConfig] = None, dynamic_shapes: Optional[Union[dict[str, Any], Tuple[Any, ...]]] = None, _ai_edge_converter_flags: Optional[dict[str, Any]] = None, @@ -289,6 +294,8 @@ def convert( and ensure the soundness of the exported graph. When strict_export="auto", the function will try to export module in both modes and use the first one succeeds for downstream conversion. + cast_i64_inputs_to_i32: If true, casts all inputs with torch.int64 type to + torch.int32. quant_config: User-defined quantization method and scheme of the model. dynamic_shapes: Optional dict or tuple that specify dynamic shape specifications for each input in original order. See @@ -317,6 +324,7 @@ def convert( sample_args, sample_kwargs, strict_export=strict_export, + cast_i64_inputs_to_i32=cast_i64_inputs_to_i32, quant_config=quant_config, dynamic_shapes=dynamic_shapes, _ai_edge_converter_flags=_ai_edge_converter_flags, diff --git a/ai_edge_torch/_convert/fx_passes/__init__.py b/ai_edge_torch/_convert/fx_passes/__init__.py index 00ccdfe0..7f9d5652 100644 --- a/ai_edge_torch/_convert/fx_passes/__init__.py +++ b/ai_edge_torch/_convert/fx_passes/__init__.py @@ -17,6 +17,7 @@ from ai_edge_torch._convert.fx_passes.build_aten_composite_pass import BuildAtenCompositePass from ai_edge_torch._convert.fx_passes.cast_inputs_bf16_to_f32_pass import CastInputsBf16ToF32Pass +from ai_edge_torch._convert.fx_passes.cast_inputs_i64_to_i32_pass import CastInputsI64ToI32Pass from ai_edge_torch._convert.fx_passes.eliminate_dead_code_pass import EliminateDeadCodePass from ai_edge_torch._convert.fx_passes.inject_mlir_debuginfo_pass import InjectMlirDebuginfoPass from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import OptimizeLayoutTransposesPass diff --git a/ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py b/ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py index 3e92d9cd..fcc45513 100644 --- a/ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py +++ b/ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py @@ -16,6 +16,7 @@ from typing import Any, Callable from ai_edge_torch import fx_infra from ai_edge_torch import lowertools +from ai_edge_torch.odml_torch import optimization_barrier as optimization_barrier_lib import torch import torch.utils._pytree as pytree @@ -276,6 +277,7 @@ def embedding(*args, **kwargs): # Explicitly reshape back to the original shape. This places the ReshapeOp # outside of the HLFB. output = torch.reshape(output, (*(original_idx_shape), embedding_dim)) + output, _ = optimization_barrier_lib.optimization_barrier(output, idx) return output node.target = embedding diff --git a/ai_edge_torch/_convert/fx_passes/cast_inputs_i64_to_i32_pass.py b/ai_edge_torch/_convert/fx_passes/cast_inputs_i64_to_i32_pass.py new file mode 100644 index 00000000..4b5e23c5 --- /dev/null +++ b/ai_edge_torch/_convert/fx_passes/cast_inputs_i64_to_i32_pass.py @@ -0,0 +1,52 @@ +# Copyright 2025 The AI Edge Torch Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Pass to cast all inputs with torch.int64 type to torch.int32.""" + + +from ai_edge_torch import fx_infra +import torch + + +def cast_i32(x): + # return x.to(torch.int32) + return x.to(torch.float32) + + +class CastInputsI64ToI32Pass(fx_infra.ExportedProgramPassBase): + """This pass casts all inputs with torch.int64 type to torch.int32.""" + + def call(self, exported_program: torch.export.ExportedProgram): + modified = False + for node in exported_program.graph.nodes: + if ( + node.op in ("placeholder", "call_function") + and node.meta.get("val") is not None + and node.meta.get("val").dtype == torch.int64 + ): + if not node.users: + continue + + modified = True + user = next(iter(node.users)) + with exported_program.graph.inserting_before(user): + cast_node = exported_program.graph.call_function( + cast_i32, + (node,), + ) + node.replace_all_uses_with(cast_node) + cast_node.replace_input_with(cast_node, node) + + exported_program.graph_module.recompile() + return fx_infra.ExportedProgramPassResult(exported_program, modified) diff --git a/ai_edge_torch/_convert/test/test_convert.py b/ai_edge_torch/_convert/test/test_convert.py index b613489d..bed4aea2 100644 --- a/ai_edge_torch/_convert/test/test_convert.py +++ b/ai_edge_torch/_convert/test/test_convert.py @@ -19,7 +19,9 @@ from typing import Tuple import ai_edge_torch +from ai_edge_torch import fx_infra from ai_edge_torch._convert import conversion_utils +from ai_edge_torch.odml_torch.experimental import torch_tfl from ai_edge_torch.quantize import pt2e_quantizer from ai_edge_torch.testing import model_coverage import numpy as np @@ -576,6 +578,39 @@ def forward(self, x: torch.Tensor): self.fail(f"Conversion failed with bloat16 inputs: {err}") # pylint: enable=broad-except + def test_convert_model_with_i64_inputs_legalization_error(self): + """Test converting a simple model with torch.int64 input. + + i64 inputs would remain in converted model signature but be casted to i32 + right after the model inputs. + """ + + class SampleModel(nn.Module): + + def forward(self, x: torch.Tensor): + return torch.linspace(0.5, 10.5, steps=x.shape[0], dtype=torch.float64) + + model = SampleModel().eval() + args = (torch.randint(0, 100, (10, 10), dtype=torch.int64),) + + # pylint: disable=broad-except + try: + # Expect this to potentially raise an error during conversion + ai_edge_torch.convert(model, args, cast_i64_inputs_to_i32=False) + self.fail("Conversion succeeded unexpectedly") + except Exception as err: + print(f"Conversion failed as expected: {err}") + expected_error_message = "failed to legalize operation 'tfl.less'" + if expected_error_message not in str(err): + self.fail(f"Unexpected error message: {err}") + + try: + # Expect this to fix the error during conversion + ai_edge_torch.convert(model, args, cast_i64_inputs_to_i32=True) + except Exception as err: + self.fail(f"Conversion failed with int64 inputs: {err}") + # pylint: enable=broad-except + def test_compile_model(self): """Tests AOT compilation of a simple Add module.""" diff --git a/ai_edge_torch/generative/examples/gemma/gemma1.py b/ai_edge_torch/generative/examples/gemma/gemma1.py index 0a04ed3f..c2549ff7 100644 --- a/ai_edge_torch/generative/examples/gemma/gemma1.py +++ b/ai_edge_torch/generative/examples/gemma/gemma1.py @@ -23,7 +23,7 @@ import torch from torch import nn -TENSOR_NAMES = loading_utils.ModelLoader.TensorNames( +TENSOR_NAMES_FUSED_QKV = loading_utils.ModelLoader.TensorNames( ff_up_proj="model.layers.{}.mlp.up_proj", ff_down_proj="model.layers.{}.mlp.down_proj", ff_gate_proj="model.layers.{}.mlp.gate_proj", @@ -36,6 +36,24 @@ lm_head=None, ) +TENSOR_NAMES_SEP_QKV = loading_utils.ModelLoader.TensorNames( + ff_up_proj="model.layers.{}.mlp.up_proj", + ff_down_proj="model.layers.{}.mlp.down_proj", + ff_gate_proj="model.layers.{}.mlp.gate_proj", + attn_query_proj="model.layers.{}.self_attn.q_proj", + attn_key_proj="model.layers.{}.self_attn.k_proj", + attn_value_proj="model.layers.{}.self_attn.v_proj", + attn_output_proj="model.layers.{}.self_attn.o_proj", + pre_attn_norm="model.layers.{}.input_layernorm", + post_attn_norm="model.layers.{}.post_attention_layernorm", + embedding="model.embed_tokens", + final_norm="model.norm", +) + +TENSOR_NAMES_DICT = { + "safetensors": TENSOR_NAMES_SEP_QKV, + "kaggle": TENSOR_NAMES_FUSED_QKV, +} class Gemma1(model_builder.DecoderOnlyModel): """A Gemma1 model built from the Edge Generative API layers.""" @@ -94,11 +112,28 @@ def build_2b_model( custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None, mask_cache_size: int = 0, ) -> nn.Module: - return model_builder.build_decoder_only_model( - checkpoint_path=checkpoint_path, - config=get_model_config_2b(), - tensor_names=TENSOR_NAMES, - model_class=Gemma1, - custom_loader=custom_loader, - mask_cache_size=mask_cache_size, + + # A list to store the reasons for each failure + key_errors = [] + + for tensor_names in TENSOR_NAMES_DICT.values(): + try: + return model_builder.build_decoder_only_model( + checkpoint_path=checkpoint_path, + config=get_model_config_2b(), + tensor_names=tensor_names, + model_class=Gemma1, + custom_loader=custom_loader, + mask_cache_size=mask_cache_size, + ) + except KeyError as ke: + # Store the specific key that was missing for later + key_errors.append(f"Missing key: {ke}") + continue + + # If the loop finishes, raise an error with all the collected details + error_details = "\n".join(key_errors) + raise RuntimeError( + "Failed to build model after trying all configurations. " + f"Encountered the following errors:\n{error_details}" ) diff --git a/ai_edge_torch/generative/examples/smolvlm2/verify_encoder.py b/ai_edge_torch/generative/examples/smolvlm2/verify_encoder.py new file mode 100644 index 00000000..7811821b --- /dev/null +++ b/ai_edge_torch/generative/examples/smolvlm2/verify_encoder.py @@ -0,0 +1,95 @@ +# Copyright 2024 The AI Edge Torch Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Verifies the reauthored SmolVLM2 Image Encoder model.""" + +import logging + +from absl import app +from absl import flags +from ai_edge_torch.generative.examples.smolvlm2 import smolvlm2 +from ai_edge_torch.generative.examples.smolvlm2 import vision_encoder +from PIL import Image +import requests +import torch +import transformers + +_IMAGE_URL = flags.DEFINE_string( + "image_url", + "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true", + "The image URI to encode.", +) + +_CHECKPOINT = flags.DEFINE_string( + "checkpoint", + "HuggingFaceTB/SmolVLM2-2.2B-Instruct", + "The checkpoint to verify.", +) + +_REAUTHORTHED_CHECKPOINT = flags.DEFINE_string( + "pretrained_weights_path", + None, + "The path to the model's pretrained weights.", +) + + +def main(_): + checkpoint = _CHECKPOINT.value + logging.info("Loading the original model from: %s", checkpoint) + original_model = transformers.AutoModelForImageTextToText.from_pretrained( + checkpoint + ) + original_model = original_model.eval().model + + logging.info("Building the reauthored checkpoint from: %s", checkpoint) + reauthored_checkpoint = _REAUTHORTHED_CHECKPOINT.value + if reauthored_checkpoint is None: + raise ValueError("reauthored_checkpoint is required.") + + logging.info("Building the reauthored model from: %s", reauthored_checkpoint) + reauthored_model = vision_encoder.build_image_encoder(reauthored_checkpoint) + + logging.info("Loading the tokenizer from: %s", checkpoint) + processor = transformers.AutoProcessor.from_pretrained(checkpoint) + + logging.info("Loading the image from: %s", _IMAGE_URL.value) + image = Image.open(requests.get(_IMAGE_URL.value, stream=True).raw) + pixel_values = processor(images=image, return_tensors="pt")["pixel_values"] + + logging.info("Forwarding the original model...") + outputs_original = original_model.get_image_features(pixel_values) + logging.info("outputs_original's shape: %s", outputs_original.shape) + + pixel_values = pixel_values.reshape( + pixel_values.shape[0] * pixel_values.shape[1], *pixel_values.shape[2:] + ) + logging.info("Forwarding the reauthored model...") + outputs_reauthored = reauthored_model.forward( + pixel_values=pixel_values + ) + logging.info("outputs_reauthored's shape: %s", outputs_reauthored.shape) + + try: + assert torch.allclose( + outputs_original, outputs_reauthored, atol=1e-03, rtol=1e-04 + ) + except AssertionError as e: + logging.error("*** FAILED *** verify with an image") + raise e + else: + logging.info("*** PASSED *** verify with an image") + + +if __name__ == "__main__": + app.run(main) diff --git a/ai_edge_torch/generative/examples/smolvlm2/vision_encoder.py b/ai_edge_torch/generative/examples/smolvlm2/vision_encoder.py index 676c5c32..3b5d15da 100644 --- a/ai_edge_torch/generative/examples/smolvlm2/vision_encoder.py +++ b/ai_edge_torch/generative/examples/smolvlm2/vision_encoder.py @@ -129,7 +129,18 @@ def forward( pixel_values: torch.Tensor, export_config: export_cfg.ExportConfig = None, ) -> torch.Tensor: - x = self.siglip_encoder(pixel_values) + # Embed the image according to SiplipVisionEmbeddings. + x = self.siglip_encoder.tok_embedding(pixel_values) + x = x.flatten(2).transpose(1, 2) + x = x + self.siglip_encoder.tok_embedding_position + + # Pass a dummy mask because SDPA attention impl expects non-None mask. + mask = torch.zeros(x.shape[0], 1, x.shape[1], x.shape[1]) + for _, block in enumerate(self.siglip_encoder.transformer_blocks): + x = block(x, mask=mask) + x = self.siglip_encoder.final_norm(x) + + # Project the image embeddings to text hidden size. x = self.connector(x) return x diff --git a/ai_edge_torch/odml_torch/experimental/torch_tfl/_decomps.py b/ai_edge_torch/odml_torch/experimental/torch_tfl/_decomps.py index 250118ef..7b93eb93 100644 --- a/ai_edge_torch/odml_torch/experimental/torch_tfl/_decomps.py +++ b/ai_edge_torch/odml_torch/experimental/torch_tfl/_decomps.py @@ -180,6 +180,11 @@ def _aten_rsqrt_decomp(x): return torch.ops.tfl.rsqrt(x) +@register_decomp(torch.ops.aten.neg.default) +def _aten_neg_decomp(x): + return torch.ops.tfl.neg(x) + + @register_decomp(torch.ops.aten.gelu.default) def _aten_gelu_decomp(x, approximate="none"): return torch.ops.tfl.gelu(x, approximate != "none") @@ -317,6 +322,38 @@ def _aten_select_int_decomp(x, dim, index): return torch.ops.tfl.squeeze(sliced, [dim]) +@register_decomp(torch.ops.aten.slice.Tensor) +def _aten_slice_tensor_decomp(x, dim=0, start=None, end=None, step=1): + rank = x.dim() + dim_size = x.shape[dim] + + # Initialize begin, end, strides for tfl.strided_slice + begin = [0] * rank + end_vec = list(x.shape) + strides = [1] * rank + + # The logic below is to match PyTorch's `slice` behavior. + # `start` and `end` can be negative, which means they count from the end. + # `start=None` defaults to 0. + # `end=None` or a large number defaults to `dim_size` after clamping. + + start_val = 0 if start is None else start + if start_val < 0: + start_val += dim_size + + end_val = dim_size if end is None else end + if end_val < 0: + end_val += dim_size + + # Clamp start and end to be within the dimension size, following PyTorch's + # logic. + start_val = max(0, min(start_val, dim_size)) + end_val = max(start_val, min(end_val, dim_size)) + + begin[dim], end_vec[dim], strides[dim] = start_val, end_val, step + return torch.ops.tfl.strided_slice(x, begin, end_vec, strides) + + @register_decomp(torch.ops.aten.where.self) def _aten_where_self_decomp(condition, x, y): x, y = _promote_types_for_binary_op(x, y) diff --git a/ai_edge_torch/odml_torch/experimental/torch_tfl/_lowerings.py b/ai_edge_torch/odml_torch/experimental/torch_tfl/_lowerings.py index 5d3425a7..8f7d69ae 100644 --- a/ai_edge_torch/odml_torch/experimental/torch_tfl/_lowerings.py +++ b/ai_edge_torch/odml_torch/experimental/torch_tfl/_lowerings.py @@ -286,6 +286,18 @@ def _tfl_rsqrt_lowering( ) +@lower(torch.ops.tfl.neg.default) +def _tfl_neg_lowering( + lctx: LoweringContext, + x: ir.Value, +) -> ir.Value: + return _ir_operation( + "tfl.neg", + results=lowering_utils.node_meta_to_ir_types(lctx.node), + operands=[x], + ) + + @lower(torch.ops.tfl.gelu.default) def _tfl_gelu_lowering( lctx: LoweringContext, diff --git a/ai_edge_torch/odml_torch/experimental/torch_tfl/_ops.py b/ai_edge_torch/odml_torch/experimental/torch_tfl/_ops.py index 9364a487..e13d7c9e 100644 --- a/ai_edge_torch/odml_torch/experimental/torch_tfl/_ops.py +++ b/ai_edge_torch/odml_torch/experimental/torch_tfl/_ops.py @@ -110,6 +110,11 @@ def tfl_rsqrt(x: torch.Tensor) -> torch.Tensor: return torch.rsqrt(x) +@custom_op_with_fake("tfl::neg") +def tfl_neg(x: torch.Tensor) -> torch.Tensor: + return torch.neg(x) + + @custom_op_with_fake("tfl::gelu") def tfl_gelu(x: torch.Tensor, approximate: bool = False) -> torch.Tensor: gelu_approximate = "tanh" if approximate else "none" diff --git a/ai_edge_torch/odml_torch/experimental/torch_tfl/test/test_torch_tfl_impls.py b/ai_edge_torch/odml_torch/experimental/torch_tfl/test/test_torch_tfl_impls.py index 6f2250aa..0f8f351b 100644 --- a/ai_edge_torch/odml_torch/experimental/torch_tfl/test/test_torch_tfl_impls.py +++ b/ai_edge_torch/odml_torch/experimental/torch_tfl/test/test_torch_tfl_impls.py @@ -152,6 +152,7 @@ def _assert_export_and_close( ("aten_cos_1", torch.ops.aten.cos.default, (rnd(torch.float32, (1, 10)),), dict()), ("aten_rsqrt_0", torch.ops.aten.rsqrt.default, (rnd(torch.float32, (10, 10)),), dict()), ("aten_rsqrt_1", torch.ops.aten.rsqrt.default, (rnd(torch.float32, (1, 10)),), dict()), + ("aten_neg_0", torch.ops.aten.neg.default, (rnd(torch.float32, (10, 10)),), dict()), ("aten_gelu_0", torch.ops.aten.gelu.default, (rnd(torch.float32, (10, 10)),), dict()), ("aten_gelu_1", torch.ops.aten.gelu.default, (rnd(torch.float32, (10, 10)),), dict(approximate="tanh")), ("aten_gelu_2", torch.ops.aten.gelu.default, (rnd(torch.float32, (1, 10)),), dict()), @@ -186,6 +187,14 @@ def _assert_export_and_close( ("aten_squeeze_dims_0", torch.ops.aten.squeeze.dims, (rnd(torch.float32, (2, 1, 2, 1, 2)), [1, 2, 3],), dict()), ("aten_select_int_0", torch.ops.aten.select.int, (rnd(torch.float32, (2, 3, 4)), 0, 1,), dict()), ("aten_select_int_1", torch.ops.aten.select.int, (rnd(torch.float32, (2, 3, 4)), 1, 1,), dict()), + ("aten_slice_tensor_0", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=0, start=1, end=3)), + ("aten_slice_tensor_1", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=1, start=2, end=5)), + ("aten_slice_tensor_2", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=0, start=None, end=5)), + ("aten_slice_tensor_3", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=0, start=2, end=None)), + ("aten_slice_tensor_4", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=0, start=-5, end=-2)), + ("aten_slice_tensor_5", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=0, start=1, end=8, step=2)), + ("aten_slice_tensor_6", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=1, start=2, end=100)), + ("aten_slice_tensor_7", torch.ops.aten.slice.Tensor, (rnd(torch.float32, (10, 10)),), dict(dim=0, start=None, end=None)), ("aten_where_self_0", torch.ops.aten.where.self, (rnd(torch.bool, (10, 10)), rnd(torch.float32, (10, 10)), rnd(torch.float32, (10, 10)),), dict()), ("aten_embedding_0", torch.ops.aten.embedding.default, (rnd(torch.float32, (10, 10)), torch.tensor([[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]]),), dict()), ("aten__softmax_0", torch.ops.aten._softmax.default, (rnd(torch.float32, (10, 10)), -1, False), dict()), diff --git a/ai_edge_torch/odml_torch/export.py b/ai_edge_torch/odml_torch/export.py index 2783af11..8e8c17e4 100644 --- a/ai_edge_torch/odml_torch/export.py +++ b/ai_edge_torch/odml_torch/export.py @@ -21,7 +21,7 @@ from typing import Any, Callable, Optional from ai_edge_torch import fx_infra -from jax.lib import xla_extension +import jax.extend from jax._src.lib.mlir import ir from jax._src.lib.mlir.dialects import func from jax._src.lib.mlir.dialects import hlo as stablehlo @@ -233,7 +233,7 @@ def module_bytecode_vhlo(self) -> bytes: target_version = stablehlo.get_version_from_compatibility_requirement( stablehlo.StablehloCompatibilityRequirement.WEEK_12 ) - module_bytecode = xla_extension.mlir.serialize_portable_artifact( + module_bytecode = jax.extend.mlir.serialize_portable_artifact( self.module_bytecode, target_version ) return module_bytecode diff --git a/ai_edge_torch/odml_torch/optimization_barrier.py b/ai_edge_torch/odml_torch/optimization_barrier.py new file mode 100644 index 00000000..88778b37 --- /dev/null +++ b/ai_edge_torch/odml_torch/optimization_barrier.py @@ -0,0 +1,71 @@ +# Copyright 2025 The AI Edge Torch Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Optimization barrier op definition and lowering.""" + +from ai_edge_torch.odml_torch import _torch_library +from ai_edge_torch.odml_torch.lowerings import registry +from jax._src.lib.mlir import ir +from jax._src.lib.mlir.dialects import hlo as stablehlo +import torch +import torch.utils._pytree as pytree + +_torch_library.ODML_TORCH_LIB.define( + "optimization_barrier(Tensor[] inputs) -> Tensor[]" +) + +optimization_barrier_op = torch.ops.odml_torch.optimization_barrier.default + + +def optimization_barrier(*inputs: pytree.PyTree): + """Apply optimization barrier to the tensors nested within arbitrary pytrees. + + Args: + *inputs: A list of tensors or tensor pytrees. + + Returns: + The tensors after optimization barrier in the same pytrees structures. + """ + if len(inputs) == 1: + inputs = inputs[0] + tensors, spec = pytree.tree_flatten(inputs) + tensors = optimization_barrier_op(tuple(tensors)) + outputs = pytree.tree_unflatten(tensors, spec) + return outputs + + +@torch.library.impl( + _torch_library.ODML_TORCH_LIB, + "optimization_barrier", + "CompositeExplicitAutograd", +) +def _optimization_barrier_impl(inputs: tuple[torch.Tensor, ...]): + return tuple(inputs) + + +@torch.library.impl( + _torch_library.ODML_TORCH_LIB, + "optimization_barrier", + "Meta", +) +def _optimization_barrier_fake(inputs: tuple[torch.Tensor, ...]): + return tuple([torch.empty_like(x) for x in inputs]) + + +@registry.lower(torch.ops.odml_torch.optimization_barrier.default) +def _optimization_barrier_lowering( + lctx, inputs: tuple[ir.Value, ...] +) -> ir.Value: + del lctx + return stablehlo.optimization_barrier(inputs) diff --git a/ai_edge_torch/odml_torch/test/test_optimization_barrier.py b/ai_edge_torch/odml_torch/test/test_optimization_barrier.py new file mode 100644 index 00000000..d25d8c8e --- /dev/null +++ b/ai_edge_torch/odml_torch/test/test_optimization_barrier.py @@ -0,0 +1,80 @@ +# Copyright 2025 The AI Edge Torch Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from ai_edge_torch import odml_torch +from ai_edge_torch.odml_torch import optimization_barrier as optimization_barrier_lib # Import to register the op. +import torch + +from absl.testing import absltest as googletest + +optimization_barrier = optimization_barrier_lib.optimization_barrier + + +class TestOptimizationBarrier(googletest.TestCase): + """Test optimization barrier op implementation and lowering.""" + + def test_applied_optimization_barrier_op(self): + """Test optimization barrier op application and lowering.""" + + class TestModel(torch.nn.Module): + + def forward(self, x, y): + x, _ = optimization_barrier(x, y) + return x + + x = torch.randn(1, 5) + ep = torch.export.export(TestModel().eval(), (x, x)) + mlir = odml_torch.export.exported_program_to_mlir(ep) + mlir_text = mlir.get_text() + self.assertEqual( + mlir_text.count( + "stablehlo.optimization_barrier %arg1, %arg1 : tensor<1x5xf32>," + " tensor<1x5xf32>" + ), + 1, + ) + + def test_input_single_tensor(self): + """Test optimization barrier with single tensor input.""" + x = torch.randn(1, 5) + y = optimization_barrier(x) + self.assertIsInstance(y, torch.Tensor) + self.assertEqual(y.shape, (1, 5)) + + def test_input_multiple_tensors(self): + """Test optimization barrier with multiple tensors input.""" + x = torch.randn(1, 5) + y = torch.randn(1, 6) + z = optimization_barrier(x, y) + self.assertIsInstance(z, tuple) + self.assertLen(z, 2) + self.assertIsInstance(z[0], torch.Tensor) + self.assertIsInstance(z[1], torch.Tensor) + self.assertEqual(z[0].shape, (1, 5)) + self.assertEqual(z[1].shape, (1, 6)) + + def test_input_nested_tensors(self): + """Test optimization barrier with nested tensor inputs.""" + x = {"foo": torch.randn(1, 5), "bar": torch.randn(1, 6)} + z = optimization_barrier(x) + self.assertIsInstance(z, dict) + self.assertLen(z, 2) + self.assertIsInstance(z["foo"], torch.Tensor) + self.assertIsInstance(z["bar"], torch.Tensor) + self.assertEqual(z["foo"].shape, (1, 5)) + self.assertEqual(z["bar"].shape, (1, 6)) + + +if __name__ == "__main__": + googletest.main()