pytorch
diff --git a/‎.ci/scripts/test_llama_runner_eager.sh‎
Lines changed: 2 additions & 1 deletion b/‎.ci/scripts/test_llama_runner_eager.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 0 deletions b/‎README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/arm/TARGETS‎
Lines changed: 12 additions & 0 deletions b/‎backends/arm/TARGETS‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/arm/_passes/insert_squeeze_after_sum_pass.py‎
Lines changed: 1 addition & 13 deletions b/‎backends/arm/_passes/insert_squeeze_after_sum_pass.py‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎backends/arm/_passes/size_adjust_conv2d_pass.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/_passes/size_adjust_conv2d_pass.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_bmm.py‎
Lines changed: 10 additions & 6 deletions b/‎backends/arm/operators/op_bmm.py‎
Lines changed: 10 additions & 6 deletions
@@ -42,11 +42,12 @@ run_and_verify() {
 	-d fp32 \
 	--max_seq_length 32 \
 	--temperature 0 \
+    --show_tokens \
 	--prompt "Once upon a time," > result.txt
 
     # Verify result.txt
     RESULT=$(cat result.txt)
-    EXPECTED_RESULT="there was a little girl"
+    EXPECTED_RESULT="727, 471, 263, 2217, 7826, 4257, 365, 2354, 29889, 2296, 18012, 304, 1708, 5377, 297, 278, 6575, 845, 457, 29889, 3118, 2462, 29892, 1183, 4446, 263"
     if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
         echo "Actual result: ${RESULT}"
         echo "Success"
 
@@ -64,3 +64,6 @@
 [submodule "third-party/pybind11"]
 	path = third-party/pybind11
 	url = https://github.com/pybind/pybind11.git
+[submodule "third-party/ao"]
+	path = third-party/ao
+	url = https://github.com/pytorch/ao.git
@@ -43,6 +43,11 @@ We recommend using the latest release tag from the
 See [CONTRIBUTING.md](CONTRIBUTING.md) for details about issues, PRs, code
 style, CI jobs, and other development topics.
 
+To connect with us and other community members, we invite you to join PyTorch Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
+* Head to the `#executorch-general` channel for general questions, discussion, and community support.
+* Join the `#executorch-contributors` channel if you're interested in contributing directly to project development.
+
+
 ## Directory Structure
 
 ```
 
@@ -70,6 +70,18 @@ python_library(
     ],
 )
 
+python_library(
+    name = "tosa_specification",
+    srcs = [
+        "tosa_specification.py",
+    ],
+    typing = True,
+    deps = [
+        "fbsource//third-party/pypi/packaging:packaging",
+        "//executorch/exir/backend:compile_spec_schema",
+    ],
+)
+
 python_library(
     name = "tosa_utils",
     srcs = [
 
@@ -14,7 +14,7 @@
     get_first_fake_tensor,
     insert_q_dq_pair,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
+from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, register_passable_op
 from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -42,6 +42,9 @@ def _transpose_impl(*args, **kwargs):
     return args[0]
 
 
+register_passable_op(torch.ops.passthrough_to_tosa._transpose)
+
+
 class AnnotateChannelsLastDimOrder(ExportPass):
     """
     Annotates each node with a tosa_dim_order. tosa_dim_order can be seen as a channels-last dim-order
 
@@ -8,9 +8,7 @@
 
 import torch
 import torch.fx
-from executorch.backends.arm._passes.arm_pass_utils import create_node, insert_q_dq_pair
-
-from executorch.backends.arm.tosa_quant_utils import get_quant_node_args, is_quant_node
+from executorch.backends.arm._passes.arm_pass_utils import create_node
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 
@@ -28,8 +26,6 @@ class InsertSqueezeAfterSumPass(ExportPass):
         sum(dims, keep_dim = False)
     After pass:
         sum(dims, keep_dim = True)
-        (q)
-        (dq)
         squeeze(dim = dims)
     """
 
@@ -45,12 +41,6 @@ def call(self, graph_module: torch.fx.GraphModule):
                 continue
 
             dim_list = cast(list[int], sum_node.args[1])
-            quantized = is_quant_node(sum_node)
-            if quantized:
-                qparams = get_quant_node_args(sum_node.all_input_nodes[0])
-                qparams = qparams + (torch.int8,)
-            else:
-                qparams = None
 
             # Add keep_dim = True arg to sum node.
             sum_node.args = sum_node.args[0:2] + (True,)
@@ -61,8 +51,6 @@ def call(self, graph_module: torch.fx.GraphModule):
                 )
                 sum_node.replace_all_uses_with(squeeze_node)
                 squeeze_node.args = (sum_node, dim_list)
-                if quantized:
-                    sum_node = insert_q_dq_pair(graph_module.graph, sum_node, qparams)
         graph_module.graph.eliminate_dead_code()
         graph_module.recompile()
         graph_module = super().call(graph_module).graph_module
 
@@ -9,7 +9,7 @@
 from typing import cast, Optional
 
 import torch.fx
-from executorch.backends.arm.tosa_quant_utils import is_quant_node
+from executorch.backends.arm.tosa_quant_utils import is_node_quantized
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch._ops import OpOverload
@@ -113,7 +113,7 @@ def call(self, graph_module: torch.fx.GraphModule):
                     slice_node = graph.create_node(
                         "call_function", self.slice_op, (last_node,) + args
                     )
-                    if is_quant_node(last_node):
+                    if is_node_quantized(last_node):
                         q_params = last_node.args[1:]
                         dq_node = insert_q_dq_pair(
                             graph_module.graph, slice_node, q_params
 
@@ -52,6 +52,7 @@ def __init__(self):
         # TODO MLETORCH-265 Remove permute_nhwc flag
         self.permute_nhwc = False
         self.quantize_io = False
+        self.tosa_version = None
 
     def ethosu_compile_spec(
         self,
 
@@ -7,6 +7,7 @@ python_library(
     typing = True,
     deps = [
         "//executorch/backends/arm:tosa_mapping",
+        "//executorch/backends/arm:tosa_specification",
     ],
 )
 
 
@@ -14,7 +14,11 @@
     register_node_visitor,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import build_rescale, get_quant_node_args
+from executorch.backends.arm.tosa_quant_utils import (
+    build_rescale,
+    get_quant_arg_downstream,
+    get_quant_arg_upstream,
+)
 from executorch.backends.arm.tosa_utils import get_two_inputs
 from serializer.tosa_serializer import TosaOp
 
@@ -42,8 +46,10 @@ def define_node(
         # For INT8, we need to get the zero points and add an intermediate tensor
         # for a later rescale.
         if is_quant_node:
-            input0_zp = get_quant_node_args(input0).zp
-            input1_zp = get_quant_node_args(input1).zp
+            input0_q_params = get_quant_arg_upstream(input0)
+            input1_q_params = get_quant_arg_upstream(input1)
+            input0_zp = input0_q_params.zp
+            input1_zp = input1_q_params.zp
             bmm_result = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
             bmm_output_name = bmm_result.name
         else:
@@ -63,9 +69,7 @@ def define_node(
 
         # As INT8 accumulates into INT32, we need to rescale it back to INT8
         if is_quant_node:
-            input0_q_params = get_quant_node_args(input0)
-            input1_q_params = get_quant_node_args(input1)
-            output_q_params = get_quant_node_args(list(node.users)[0])
+            output_q_params = get_quant_arg_downstream(list(node.users)[0])
 
             final_output_scale = (
                 input0_q_params.scale * input1_q_params.scale
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ python_library(`
`7`	`7`	`typing = True,`
`8`	`8`	`deps = [`
`9`	`9`	`"//executorch/backends/arm:tosa_mapping",`
	`10`	`+ "//executorch/backends/arm:tosa_specification",`
`10`	`11`	`],`
`11`	`12`	`)`
`12`	`13`