Fix ai_edge_torch for g3 torch upgrade

chunnienc · copybara-github · commit 56b43759b221 · 2025-12-17T16:24:17.000-08:00
PiperOrigin-RevId: 845961480
diff --git a/ai_edge_torch/fx_infra/__init__.py b/ai_edge_torch/fx_infra/__init__.py
@@ -30,3 +30,4 @@
 
 CanonicalizePass = _canonicalize_pass.CanonicalizePass
 safe_run_decompositions = _safe_run_decompositions.safe_run_decompositions
+annotate_force_decomp = _safe_run_decompositions.annotate_force_decomp
diff --git a/ai_edge_torch/fx_infra/_safe_run_decompositions.py b/ai_edge_torch/fx_infra/_safe_run_decompositions.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """ExportedProgram.run_decompositions wrapper to handle unexpected export behavior."""
 import operator
+from typing import Any, Callable
 import torch
 
 
@@ -59,6 +60,15 @@ def _require_decomp(
   return False
 
 
+_FORCE_DECOMP_ATTR = "_ai_edge_torch_force_decomp"
+
+
+def annotate_force_decomp(decomp: Callable[..., Any]):
+  """Annotates a decomp to force it to be run (at least shallowly) in safe_run_decompositions."""
+  setattr(decomp, _FORCE_DECOMP_ATTR, _FORCE_DECOMP_ATTR)
+  return decomp
+
+
 def safe_run_decompositions(exported_program, decomp_table=None, can_skip=True):
   """Wrapper for ExportedProgram.run_decompositions to handle unexpected export behavior."""
 
@@ -79,6 +89,14 @@ def safe_run_decompositions(exported_program, decomp_table=None, can_skip=True):
       # back to one aten.view.
       node.target = lambda self, size: torch.reshape(self.contiguous(), size)
 
+    # Torch may skip some decompositions even if target is in decomp_table.
+    # The following ensures the target is always run through the decompositions
+    # shallowly if it has _FORCE_DECOMP_ATTR.
+    if decomp_table and node.target in decomp_table:
+      decomp = decomp_table[node.target]
+      if hasattr(decomp, _FORCE_DECOMP_ATTR):
+        node.target = decomp
+
   exported_program = exported_program.run_decompositions(decomp_table)
 
   if hasattr(torch.ops.aten, "_assert_tensor_metadata"):
diff --git a/ai_edge_torch/odml_torch/lowerings/_decomp_registry.py b/ai_edge_torch/odml_torch/lowerings/_decomp_registry.py
@@ -14,13 +14,72 @@
 # ==============================================================================
 """Torch export decompositions to run before lowering."""
 
+import functools
 from ai_edge_torch import fx_infra
 import torch
 
 
+# Fork from pytorch/torch/_decomp/decompositions.py
+def upsample_compute_output_size(input_size, output_size, scale_factors):
+  spatial_dimensions = len(input_size) - 2
+  if output_size is not None:
+    torch._check(
+        scale_factors is None,
+        lambda: "Must specify exactly one of output_size and scale_factors",
+    )
+    torch._check(len(output_size) == spatial_dimensions, lambda: "")
+    return output_size
+  if scale_factors is not None:
+    # NB: this isn't necessary lol
+    torch._check(
+        output_size is None,
+        lambda: "Must specify exactly one of output_size and scale_factors",
+    )
+    torch._check(len(scale_factors) == spatial_dimensions, lambda: "")
+    output_size = []
+    for i, s in enumerate(scale_factors):
+      if int(s) == s:
+        output_size.append(input_size[i + 2] * int(s))
+      else:
+        output_size.append(torch.sym_int(input_size[i + 2] * s))
+    return output_size
+  torch._check(
+      False, lambda: "Must specify exactly one of output_size and scale_factors"
+  )
+
+
+# Fork from pytorch/torch/_decomp/decompositions.py
+def _compute_upsample_nearest_indices(input, output_size, scales, exact=False):
+  indices = []
+  num_spatial_dims = len(output_size)
+  offset = 0.5 if exact else 0.0
+
+  for d in range(num_spatial_dims):
+    osize = output_size[d]
+    isize = input.shape[-num_spatial_dims + d]
+    scale = (
+        isize / (isize * scales[d]) if scales[d] is not None else isize / osize
+    )
+
+    output_indices = torch.arange(
+        osize, dtype=torch.float32, device=input.device
+    )
+    input_indices = ((output_indices + offset) * scale).to(torch.int64)
+    for _ in range(num_spatial_dims - 1 - d):
+      input_indices = input_indices.unsqueeze(-1)
+    indices.append(input_indices)
+  return tuple(indices)
+
+
+# Fork from pytorch/torch/_decomp/decompositions.py
+def _upsample_nearest2d_common(input, h_indices, w_indices):
+  result = torch.ops.aten.index(input, (None, None, h_indices, w_indices))
+  result = result.contiguous()
+  return result
+
+
 fx_infra.decomp.update_pre_lower_decomp(
     torch._decomp.get_decompositions([
-        torch.ops.aten.upsample_nearest2d,
         torch.ops.aten._native_batch_norm_legit.no_stats,
         torch.ops.aten._native_batch_norm_legit_functional,
         torch.ops.aten._adaptive_avg_pool2d,
@@ -35,11 +94,44 @@
         torch.ops.aten.replication_pad2d,
         torch.ops.aten.replication_pad3d,
         torch.ops.aten.upsample_bilinear2d.vec,
-        torch.ops.aten.upsample_nearest2d.vec,
         torch.ops.aten.addmm,
     ])
 )
 
+
+@functools.partial(
+    fx_infra.decomp.add_pre_lower_decomp,
+    torch.ops.aten.upsample_nearest2d.default,
+)
+@fx_infra.annotate_force_decomp
+def upsample_nearest2d(input, output_size, scales_h=None, scales_w=None):
+  h_indices, w_indices = _compute_upsample_nearest_indices(
+      input, output_size, (scales_h, scales_w)
+  )
+  return _upsample_nearest2d_common(input, h_indices, w_indices)
+
+
+def get_scale_value(scales, idx):
+  if scales is None:
+    return None
+  return scales[idx]
+
+
+@functools.partial(
+    fx_infra.decomp.add_pre_lower_decomp,
+    torch.ops.aten.upsample_nearest2d.vec,
+)
+@fx_infra.annotate_force_decomp
+def upsample_nearest2d_vec(input, output_size, scale_factors):
+  osize = upsample_compute_output_size(input.size(), output_size, scale_factors)
+  scale_h = get_scale_value(scale_factors, 0)
+  scale_w = get_scale_value(scale_factors, 1)
+
+  return torch.ops.aten.upsample_nearest2d.default(
+      input, osize, scale_h, scale_w
+  )
+
+
 fx_infra.decomp.remove_pre_lower_decomp(torch.ops.aten.roll)
 
 # Torch's default einsum impl/decompositions is less efficient and

Original file line number	Diff line number	Diff line change
`@@ -30,3 +30,4 @@`
`30`	`30`
`31`	`31`	`CanonicalizePass = _canonicalize_pass.CanonicalizePass`
`32`	`32`	`safe_run_decompositions = _safe_run_decompositions.safe_run_decompositions`
	`33`	`+annotate_force_decomp = _safe_run_decompositions.annotate_force_decomp`