Fix U55 int16 table generation (rsqrt, sigmoid, tanh)

3l1 · facebook-github-bot · commit 76c7ac776055 · 2025-10-26T20:26:26.000-07:00
Summary:
This diff fixes critical runtime bugs in U55 INT16 table operations (rsqrt, sigmoid, tanh)

**WARNING: This diff goes with the Regor diff D85535937 and is only split because it maps to a separate OSS github repo (the Arm  Regor git repo)**

bypass-github-export-checks
bypass-github-pytorch-ci-checks
bypass-github-executorch-ci-checks

Differential Revision: D85312140
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -200,7 +200,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
 
         self.add_pass(FuseViewCopyTransform())
         self.add_pass(FuseConstantArgsPass(exported_program))
-        self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(InsertTableOpsPass(exported_program, self.tosa_spec))
         # If we have a conv2d with int16 activation split up into a convolution
         # and an addition, to work-around the lack of support for int48 in torch
         # needs to happen before RewriteConv2dPass, but after the table ops are inserted
@@ -297,7 +297,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(RewriteConv2dPass(exported_program))
         self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(RewriteUpsamplePass())
-        self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(InsertTableOpsPass(exported_program, self.tosa_spec))
         self.add_pass(RewriteMatmulPass())
         self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(ToTosaMemoryFormatPass(exported_program))
diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py
@@ -119,9 +119,10 @@ class InsertTableOpsPass(ArmPass):
 
     _passes_required_after: Set[Type[ExportPass]] = set()
 
-    def __init__(self, exported_program: ExportedProgram) -> None:
+    def __init__(self, exported_program: ExportedProgram, tosa_spec=None) -> None:
         super().__init__()
         self.exported_program = exported_program
+        self.tosa_spec = tosa_spec
         self.table_ops = TableOps(exported_program)
 
     def register_buffer(self, buffer_name: str, buffer: torch.Tensor) -> None:
@@ -157,6 +158,82 @@ def f(x: torch.Tensor) -> torch.Tensor:
             0,
         )
 
+    def generate_16_bit_table_values_u55_tflite_style(
+        self,
+        torch_op: Callable[[torch.Tensor], torch.Tensor],
+        in_quantargs: QuantArgs,
+        out_quantargs: QuantArgs,
+    ) -> tuple[torch.Tensor, int]:
+        """
+        Generate table values for U55 using U55-style bias correction.
+
+        1. Evaluate function at base, midpoint, and next for each interval IN FLOAT SPACE
+        2. Quantize all three output values
+        3. Calculate bias = (interpolated_midpoint - actual_midpoint) / 2
+        4. Apply bias correction to base value
+        5. Store corrected base values (513 values total)
+        """
+        import math
+
+        # Debug: Check if this function is being called
+
+        # Calculate input range in FLOAT space (like TFLite)
+        qmin_in = in_quantargs.qmin
+        qmax_in = in_quantargs.qmax
+        qmin_out = out_quantargs.qmin
+        qmax_out = out_quantargs.qmax
+
+        input_min = in_quantargs.scale * (qmin_in - in_quantargs.zp)
+        input_max = in_quantargs.scale * (qmax_in - in_quantargs.zp)
+        output_min = out_quantargs.scale * (qmin_out - out_quantargs.zp)
+        output_max = out_quantargs.scale * (qmax_out - out_quantargs.zp)
+
+        steps = 512
+        step = (input_max - input_min) / steps
+        half_step = step / 2.0
+        output_scaling_inv = (qmax_out - qmin_out + 1) / (output_max - output_min)
+
+
+        def f(x_float: float) -> float:
+            """Evaluate torch_op at x_float, handling NaN/inf."""
+            x_tensor = torch.tensor([x_float], dtype=torch.float32)
+            result = torch_op(x_tensor).item()
+
+            if math.isnan(result) or math.isinf(result):
+                return input_max  # Will quantize to qmax_out
+
+            return result
+
+        lut_values = []
+
+        for i in range(steps + 1):  # 513 values (0 to 512)
+            val = f(input_min + i * step)
+            sample_val = round(val * output_scaling_inv)
+
+            if i < steps:
+                val_midpoint = f(input_min + i * step + half_step)
+                val_next = f(input_min + (i + 1) * step)
+
+                midpoint_interp_val = round(
+                    (val_next * output_scaling_inv + sample_val) / 2.0
+                )
+                midpoint_val = round(val_midpoint * output_scaling_inv)
+                midpoint_err = midpoint_interp_val - midpoint_val
+                bias = round(midpoint_err / 2.0)
+
+                clamped_lut_result = max(qmin_out, min(qmax_out, sample_val - bias))
+                lut_result = int(clamped_lut_result)
+
+                lut_values.append(lut_result)
+            else:
+                # Last value (i == steps): no bias correction, just quantize and clamp
+                clamped = max(qmin_out, min(qmax_out, sample_val))
+                lut_values.append(int(clamped))
+
+        buffer = torch.tensor(lut_values, dtype=torch.int16).contiguous()
+
+        return buffer, 0
+
     def generate_16_bit_table_values(
         self,
         torch_op: Callable[[torch.Tensor], torch.Tensor],
@@ -178,6 +255,12 @@ def generate_16_bit_table_values(
         The function returns rescale_lshift which says how much to rescale after the table. This value can negative.
         """
 
+        # U55 needs TFLite-style table generation with bias correction
+        if self.tosa_spec is not None and self.tosa_spec.is_U55_subset:
+            return self.generate_16_bit_table_values_u55_tflite_style(
+                torch_op, in_quantargs, out_quantargs
+            )
+
         def f(x: torch.Tensor) -> torch.Tensor:
             x = x.clamp(in_quantargs.qmin, in_quantargs.qmax).to(
                 dtype=in_quantargs.dtype
@@ -280,7 +363,15 @@ def call(self, graph_module: GraphModule) -> PassResult:
                 )
                 output_node = table_op_node
 
-                if lshift != 0:
+                if (
+                    self.tosa_spec
+                    and self.tosa_spec.is_U55_subset
+                    and input_qparams[0].dtype == torch.int16
+                ):
+                    # U55: NO RESCALE needed - use table output directly
+                    # Adding RESCALE creates a second operation that overwrites the table output!
+                    output_node = table_op_node  # Use table output directly!
+                elif lshift != 0:
                     scale = 2.0**lshift
                     rescale_node = create_node(
                         graph=graph_module.graph,
diff --git a/backends/arm/test/ops/test_rsqrt.py b/backends/arm/test/ops/test_rsqrt.py
@@ -156,9 +156,9 @@ def test_rsqrt_int16_tosa_INT(test_tensor: torch.Tensor):
 
 @common.parametrize("test_tensor", Rsqrt.test_parameters)
 @common.XfailIfNoCorstone300
-@pytest.mark.xfail(
-    reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
-)
+# @pytest.mark.xfail(
+#     reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
+# )
 def test_rsqrt_int16_u55_INT16(test_tensor: torch.Tensor):
     """Test rsqrt operation with int16 quantization on U55"""
     pipeline = EthosU55PipelineINT[input_t1](
@@ -182,9 +182,9 @@ def test_rsqrt_int16_u55_INT16(test_tensor: torch.Tensor):
 
 @common.parametrize("test_tensor", Rsqrt.test_parameters)
 @common.XfailIfNoCorstone320
-@pytest.mark.xfail(
-    reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
-)
+# @pytest.mark.xfail(
+#     reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
+# )
 def test_rsqrt_int16_u85_INT16(test_tensor: torch.Tensor):
     """Test rsqrt operation with int16 quantization on U85"""
     pipeline = EthosU85PipelineINT[input_t1](
diff --git a/backends/arm/test/ops/test_sigmoid.py b/backends/arm/test/ops/test_sigmoid.py
@@ -312,9 +312,6 @@ def test_sigmoid_16a8w_tosa_INT(test_data: torch.Tensor):
 
 @common.parametrize("test_data", test_data_suite)
 @common.XfailIfNoCorstone300
-@pytest.mark.xfail(
-    reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
-)
 def test_sigmoid_16a8w_u55_INT16(test_data: torch.Tensor):
     """Test sigmoid operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
     per_channel_quantization = False
diff --git a/backends/arm/test/ops/test_tanh.py b/backends/arm/test/ops/test_tanh.py
@@ -163,9 +163,6 @@ def test_tanh_16a8w_tosa_INT(test_data: torch.Tensor):
 
 @common.parametrize("test_data", test_data_suite)
 @common.XfailIfNoCorstone300
-@pytest.mark.xfail(
-    reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
-)
 def test_tanh_16a8w_u55_INT16(test_data: torch.Tensor):
     """Test tanh operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
     per_channel_quantization = False