oscarandersson8218
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/arm/_passes/insert_table_ops.py‎
Lines changed: 118 additions & 0 deletions b/‎backends/arm/_passes/insert_table_ops.py‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_exp.py‎
Lines changed: 7 additions & 47 deletions b/‎backends/arm/operators/op_exp.py‎
Lines changed: 7 additions & 47 deletions
diff --git a/‎backends/arm/operators/op_log.py‎
Lines changed: 6 additions & 47 deletions b/‎backends/arm/operators/op_log.py‎
Lines changed: 6 additions & 47 deletions
diff --git a/‎backends/arm/operators/op_reciprocal.py‎
Lines changed: 7 additions & 50 deletions b/‎backends/arm/operators/op_reciprocal.py‎
Lines changed: 7 additions & 50 deletions
@@ -33,6 +33,7 @@
     FoldAndAnnotateQParamsPass,
     QuantizeFullArgument,
 )
+from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.keep_dims_false_to_squeeze_pass import (
     KeepDimsFalseToSqueezePass,
 )
@@ -94,10 +95,17 @@ def transform_to_backend_pipeline(
                     exir_ops.edge.aten.add.Tensor,
                     exir_ops.edge.aten.avg_pool2d.default,
                     exir_ops.edge.aten.convolution.default,
+                    exir_ops.edge.aten.exp.default,
                     exir_ops.edge.aten.full.default,
+                    exir_ops.edge.aten.log.default,
+                    exir_ops.edge.aten.reciprocal.default,
+                    exir_ops.edge.aten.rsqrt.default,
+                    exir_ops.edge.aten.sigmoid.default,
+                    exir_ops.edge.aten.tanh.default,
                 ]
             )
         )
+        self.add_pass(InsertTableOpsPass(exported_program))
         for spec in compile_spec:
             if spec.key == "permute_memory_format":
                 memory_format = spec.value.decode()
 
@@ -0,0 +1,118 @@
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.exir import ExportedProgram
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.fx import GraphModule
+from torch.library import impl, Library
+
+lib = Library("tosa", "DEF")
+lib.define("_table(Tensor self) -> Tensor")
+
+
+@impl(lib, "_table")
+def _table_impl(*args, **kwargs):
+    return args[0]
+
+
+class InsertTableOpsPass(ExportPass):
+    """
+    For ops in self.table_ops they need to be serialized as a TOSA TABLE. This pass replaces these
+    edge ops with a tosa._table(input: Tensor, target_str: str) where target_str == str(node.target).
+    When loweringthe _table node target_str will be used to find the corresponding torch operator
+    which will be used to produce the table values in operators/op_table.py.
+    """
+
+    table_ops = {
+        exir_ops.edge.aten.exp.default: torch.exp,
+        exir_ops.edge.aten.log.default: torch.log,
+        exir_ops.edge.aten.reciprocal.default: torch.reciprocal,
+        exir_ops.edge.aten.rsqrt.default: torch.rsqrt,
+        exir_ops.edge.aten.sigmoid.default: torch.sigmoid,
+        exir_ops.edge.aten.tanh.default: torch.tanh,
+    }
+
+    def __init__(self, exported_program: ExportedProgram):
+        super().__init__()
+        self.exported_program = exported_program
+
+    def register_buffer(self, buffer_name: str, buffer: torch.Tensor) -> None:
+        """
+        Add buffer to self.exported_program.state_dict
+        """
+        self.exported_program.state_dict[buffer_name] = buffer
+
+    def generate_table_values(
+        self,
+        torch_op: Callable[[torch.Tensor], torch.Tensor],
+        in_quantargs: QuantArgs,
+        out_quantargs: QuantArgs,
+    ) -> torch.Tensor:
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x = in_quantargs.dequantize_value(x)
+            x = torch_op(x)
+            return out_quantargs.quantize_value(x)
+
+        input_dtype = in_quantargs.dtype
+        steps = in_quantargs.qmax - in_quantargs.qmin + 1
+        return f(
+            torch.linspace(
+                start=in_quantargs.qmin,
+                end=in_quantargs.qmax,
+                steps=steps,
+                # use torch.int64 to avoid overflow when dequantizing (subtracting zp).
+                # e.g. torch.tensor(-50, dtype=torch.int8) - 100 == torch.tensor(106, dtype=torch.int8)
+                dtype=torch.int64,
+            )
+        ).to(dtype=input_dtype)
+
+    def call(self, graph_module: GraphModule) -> PassResult:
+        modified = False
+        for node in graph_module.graph.nodes:
+            if node.op != "call_function" or node.target not in self.table_ops:
+                continue
+            input_qparams = node.meta["input_qparams"]
+            output_qparams = node.meta["output_qparams"]
+            if len(input_qparams) == 0 or len(output_qparams) == 0:
+                # We only want to replace the node if it's quantized
+                continue
+            # Create table node
+            with graph_module.graph.inserting_before(node):
+                table_node = create_node(
+                    graph=graph_module.graph,
+                    op_target=torch.ops.tosa._table,
+                    args=(node.args[0],),
+                )
+                assert len(input_qparams) == 1
+                assert len(output_qparams) == 1
+                # Generate table buffer
+                buffer = self.generate_table_values(
+                    torch_op=self.table_ops[node.target],
+                    in_quantargs=input_qparams[0],
+                    out_quantargs=output_qparams[0],
+                )
+                # Register buffer in self.exported_program.state_dict
+                self.register_buffer(buffer_name=table_node.name, buffer=buffer)
+                node.replace_all_uses_with(table_node)
+            graph_module.graph.erase_node(node)
+            table_node.meta["input_qparams"] = input_qparams
+            table_node.meta["output_qparams"] = output_qparams
+            modified = True
+
+        if modified:
+            # retrace the graph to update the fake tensor types
+            graph_module = super().call(graph_module).graph_module
+
+            graph_module.recompile()
+        return PassResult(graph_module, modified)
@@ -37,6 +37,7 @@
     op_squeeze,
     op_sub,
     op_sum,
+    op_table,
     op_tanh,
     op_to_copy,
     op_transpose,
 
@@ -6,30 +6,25 @@
 # pyre-unsafe
 from typing import List
 
-import numpy as np
-
 import serializer.tosa_serializer as ts
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_specification import TosaSpecification
 
-from executorch.backends.arm.tosa_quant_utils import (
-    dequantize_value,
-    get_quant_arg_downstream,
-    get_quant_arg_upstream,
-    QuantArgs,
-    quantize_value,
-)
 from serializer.tosa_serializer import TosaOp
 from torch.fx import Node
 
 
 @register_node_visitor
-class ExpVisitor(NodeVisitor):
+class ExpVisitor_0_80_MI(NodeVisitor):
     target = "aten.exp.default"
 
+    # BI case should be handled by op_table
+    tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
+
     def __init__(self, *args):
         super().__init__(*args)
 
@@ -43,41 +38,6 @@ def define_node(
     ) -> None:
 
         assert len(node.all_input_nodes) == 1
+        assert inputs[0].dtype == output.dtype == ts.DType.FP32
 
-        if is_quant_node:
-            # Assume quantized input is 8 bit.
-
-            # Create attribute for 8 bit table lookup.
-            input_node = node.all_input_nodes[0]
-            in_quantargs = get_quant_arg_upstream(input_node)
-            output_node = list(node.users)[0]
-            out_quantargs = get_quant_arg_downstream(output_node)
-
-            table = exp_table_8bit(in_quantargs, out_quantargs)
-            table_attr = ts.TosaSerializerAttribute()
-            table_attr.TableAttribute(table)
-
-            tosa_graph.addOperator(
-                TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr
-            )
-        else:
-            tosa_graph.addOperator(TosaOp.Op().EXP, [inputs[0].name], [output.name])
-
-
-def exp_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs):
-    """
-    Returns a table mapping 256 entries to exp([qmin,qmax])
-    """
-
-    def exp(x):
-        # Convert quantized input to floating point exp input space.
-        v = dequantize_value(x, in_quantargs)
-        # Compute exp.
-        v = np.exp(v)
-        # Convert exp output back to quantized space.
-        return quantize_value(v, out_quantargs)
-
-    return [
-        exp(x)
-        for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8)
-    ]
+        tosa_graph.addOperator(TosaOp.Op().EXP, [inputs[0].name], [output.name])
@@ -6,22 +6,14 @@
 # pyre-unsafe
 from typing import List
 
-import numpy as np
-
 import serializer.tosa_serializer as ts
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_specification import TosaSpecification
 
-from executorch.backends.arm.tosa_quant_utils import (
-    dequantize_value,
-    get_quant_arg_downstream,
-    get_quant_arg_upstream,
-    QuantArgs,
-    quantize_value,
-)
 from serializer.tosa_serializer import TosaOp
 from torch.fx import Node
 
@@ -30,6 +22,9 @@
 class LogVisitor(NodeVisitor):
     target = "aten.log.default"
 
+    # BI case should be handled by op_table
+    tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
+
     def __init__(self, *args):
         super().__init__(*args)
 
@@ -41,44 +36,8 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
-
         assert len(node.all_input_nodes) == 1
         assert len(node.users) == 1
+        assert inputs[0].dtype == output.dtype == ts.DType.FP32
 
-        if is_quant_node:
-            # Assume quantized input is 8 bit.
-
-            # Create attribute for 8 bit table lookup.
-            input_node = node.all_input_nodes[0]
-            in_quantargs = get_quant_arg_upstream(input_node)
-            output_node = list(node.users)[0]
-            out_quantargs = get_quant_arg_downstream(output_node)
-
-            table = log_table_8bit(in_quantargs, out_quantargs)
-            table_attr = ts.TosaSerializerAttribute()
-            table_attr.TableAttribute(table)
-
-            tosa_graph.addOperator(
-                TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr
-            )
-        else:
-            tosa_graph.addOperator(TosaOp.Op().LOG, [inputs[0].name], [output.name])
-
-
-def log_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs):
-    """
-    Returns a table mapping 256 entries to log([qmin,qmax])
-    """
-
-    def log(x):
-        # Convert quantized input to floating point log input space.
-        v = dequantize_value(x, in_quantargs)
-        # Compute log.
-        v = np.log(v)
-        # Convert log output back to quantized space.
-        return quantize_value(v, out_quantargs)
-
-    return [
-        log(x)
-        for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8)
-    ]
+        tosa_graph.addOperator(TosaOp.Op().LOG, [inputs[0].name], [output.name])
@@ -6,29 +6,24 @@
 # pyre-unsafe
 from typing import List
 
-import numpy as np
-
 import serializer.tosa_serializer as ts
 import torch
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import (
-    dequantize_value,
-    get_quant_arg_downstream,
-    get_quant_arg_upstream,
-    QuantArgs,
-    quantize_value,
-)
+from executorch.backends.arm.tosa_specification import TosaSpecification
 from serializer.tosa_serializer import TosaOp
 
 
 @register_node_visitor
-class DivVisitor(NodeVisitor):
+class ReciprocalVisitor_080_MI(NodeVisitor):
     target = "aten.reciprocal.default"
 
+    # BI case should be handled by op_table
+    tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
+
     def __init__(self, *args):
         super().__init__(*args)
 
@@ -40,43 +35,5 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
-        # 1/X
-
-        if is_quant_node:
-            input = inputs[0]
-            input_qargs = get_quant_arg_upstream(node.all_input_nodes[0])
-            output_qargs = get_quant_arg_downstream(list(node.users)[0])
-
-            div_table = div_table_8bit(input_qargs, output_qargs)
-
-            table_attr = ts.TosaSerializerAttribute()
-            table_attr.TableAttribute(div_table)
-            tosa_graph.addOperator(
-                TosaOp.Op().TABLE, [input.name], [output.name], table_attr
-            )
-
-        else:
-            tosa_graph.addOperator(
-                TosaOp.Op().RECIPROCAL, [inputs[0].name], [output.name]
-            )
-
-
-def div_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs):
-    """
-    Returns a table mapping 256 entries to div([qmin,qmax])
-    """
-
-    def div(x):
-        # Convert quantized input to floating point div input space.
-        v1 = dequantize_value(x, in_quantargs)
-        # Compute div.
-        v2 = 1.0 / v1
-        # Convert div output back to quantized space.
-        v3 = quantize_value(v2, out_quantargs)
-
-        return v3
-
-    return [
-        div(x)
-        for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8)
-    ]
+        assert inputs[0].dtype == output.dtype == ts.DType.FP32
+        tosa_graph.addOperator(TosaOp.Op().RECIPROCAL, [inputs[0].name], [output.name])
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@`
`33`	`33`	`FoldAndAnnotateQParamsPass,`
`34`	`34`	`QuantizeFullArgument,`
`35`	`35`	`)`
	`36`	`+from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass`
`36`	`37`	`from executorch.backends.arm._passes.keep_dims_false_to_squeeze_pass import (`
`37`	`38`	`KeepDimsFalseToSqueezePass,`
`38`	`39`	`)`
`@@ -94,10 +95,17 @@ def transform_to_backend_pipeline(`
`94`	`95`	`exir_ops.edge.aten.add.Tensor,`
`95`	`96`	`exir_ops.edge.aten.avg_pool2d.default,`
`96`	`97`	`exir_ops.edge.aten.convolution.default,`
	`98`	`+ exir_ops.edge.aten.exp.default,`
`97`	`99`	`exir_ops.edge.aten.full.default,`
	`100`	`+ exir_ops.edge.aten.log.default,`
	`101`	`+ exir_ops.edge.aten.reciprocal.default,`
	`102`	`+ exir_ops.edge.aten.rsqrt.default,`
	`103`	`+ exir_ops.edge.aten.sigmoid.default,`
	`104`	`+ exir_ops.edge.aten.tanh.default,`
`98`	`105`	`]`
`99`	`106`	`)`
`100`	`107`	`)`
	`108`	`+ self.add_pass(InsertTableOpsPass(exported_program))`
`101`	`109`	`for spec in compile_spec:`
`102`	`110`	`if spec.key == "permute_memory_format":`
`103`	`111`	`memory_format = spec.value.decode()`