pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 3 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 30 additions & 2 deletions b/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 30 additions & 2 deletions
diff --git a/‎backends/arm/README.md‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/TARGETS‎
Lines changed: 6 additions & 53 deletions b/‎backends/arm/TARGETS‎
Lines changed: 6 additions & 53 deletions
diff --git a/‎backends/arm/_passes/TARGETS‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/_passes/TARGETS‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
@@ -97,7 +97,7 @@ test_model() {
     bash examples/models/llava/install_requirements.sh
     STRICT="--no-strict"
   fi
-  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+  if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
 
@@ -176,7 +176,7 @@ jobs:
           - model: phi_4_mini
             backend: portable
             runner: linux.arm64.m7g.4xlarge
-          - model: qwen2_5
+          - model: qwen2_5_1_5b
             backend: portable
             runner: linux.arm64.2xlarge
           - model: llama3_2_vision_encoder
 
@@ -699,9 +699,7 @@ if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
       ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
       ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
   )
-  add_subdirectory(
-    ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
-  )
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/csrc/cpu)
   unset(EXECUTORCH_INCLUDE_DIRS)
 
   executorch_target_link_options_shared_lib(torchao_ops_executorch)
 
@@ -175,7 +175,6 @@ def dequantize_affine(context, node):
         int_data.astype(quantized_np_dtype),
         zero_point,
         scale,
-        axis=-1,
         name=node.name,
     )
     context.add(output, node.name)
 
@@ -27,9 +27,9 @@
 class TestTorchOps(unittest.TestCase):
     edge_compile_config = executorch.exir.EdgeCompileConfig()
 
-    def _coreml_partitioner(self):
+    def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18):
         compile_specs = CoreMLBackend.generate_compile_specs(
-            minimum_deployment_target=ct.target.iOS18
+            minimum_deployment_target=minimum_deployment_target
         )
         return CoreMLPartitioner(compile_specs=compile_specs)
 
@@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
         et_prog = delegated_program.to_executorch()
         self._compare_outputs(et_prog, model, example_inputs)
 
+    def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self):
+        model, example_inputs = self._get_test_model()
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
+            lambda m, fqn: isinstance(m, torch.nn.Embedding),
+        )
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
+        )
+        ep = torch.export.export(model, example_inputs)
+        delegated_program = executorch.exir.to_edge_transform_and_lower(
+            ep,
+            partitioner=[
+                self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16)
+            ],
+        )
+        for node in delegated_program.exported_program().graph.nodes:
+            if node.op == "call_function":
+                assert node.target.__name__ in [
+                    "executorch_call_delegate",
+                    "getitem",
+                ], f"Got unexpected node target after delegation: {node.target.__name__}"
+        et_prog = delegated_program.to_executorch()
+        self._compare_outputs(et_prog, model, example_inputs)
+
     def test_dequantize_codebook_linear_per_grouped_col(self):
         model, example_inputs = self._get_test_model()
         quantize_(
@@ -298,6 +325,7 @@ def forward(self, x):
     test_runner.test_dequantize_affine_c4w_embedding()
     test_runner.test_dequantize_affine_c4w_linear()
     test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
+    test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16()
     test_runner.test_dequantize_codebook_linear_per_grouped_col()
     test_runner.test_dequantize_codebook_linear_per_grouped_row()
     test_runner.test_dequantize_codebook_embedding_per_grouped_col()
 
@@ -34,7 +34,7 @@ For more information on TOSA see https://www.mlplatform.org/tosa/tosa_spec.html
 ## Layout of key components
 
 Export:
-* `tosa_backend.py` - The TOSA conversion flow all other backends rely on.
+* `tosa/backend.py` - The TOSA conversion flow all other backends rely on.
 * `ethosu/backend.py` - Main entrypoint for the EthosUBackend.
 * `vgf_backend.py` - Main entrypoint for VgfBackend.
   * For more information see the section on [Arm Backend Architecture](#arm-backend-architecture).
 
@@ -37,14 +37,15 @@ python_library(
 python_library(
     name = "arm_partitioner",
     srcs = [
-        "tosa_backend.py",
-        "tosa_partitioner.py",
+        "tosa/backend.py",
+        "tosa/partitioner.py",
         "vgf_backend.py",
         "vgf_partitioner.py",
     ],
     deps = [
         ":arm_backend",
         ":constants",
+        "//executorch/backends/arm/debug:schema",
         "//executorch/backends/arm/operator_support:operator_support",
         "//executorch/backends/arm/_passes:passes",
         "//executorch/exir:lib",
@@ -76,9 +77,9 @@ python_library(
         "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
         "fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
         "//executorch/backends/arm/operators:node_visitor",
-        "//executorch/backends/arm:tosa_mapping",
-        "//executorch/backends/arm:tosa_quant_utils",
-        "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/arm/tosa:mapping",
+        "//executorch/backends/arm/tosa:quant_utils",
+        "//executorch/backends/arm/tosa:utils",
         "//executorch/exir:lib",
     ],
 )
@@ -91,54 +92,6 @@ python_library(
         "fbsource//third-party/pypi/ethos-u-vela:ethos-u-vela",
     ],
 )
-python_library(
-    name = "tosa_mapping",
-    srcs = [
-        "tosa_mapping.py",
-    ],
-    deps = [
-        "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
-        "fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
-        "//caffe2:torch",
-    ],
-)
-python_library(
-    name = "tosa_quant_utils",
-    srcs = [
-        "tosa_quant_utils.py",
-    ],
-    deps = [
-        "fbsource//third-party/pypi/numpy:numpy",
-        "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
-        "fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
-        "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
-        "fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
-        ":constants",
-        ":tosa_mapping",
-        "//executorch/exir/dialects:lib",
-    ],
-)
-python_library(
-    name = "tosa_specification",
-    srcs = [
-        "tosa_specification.py",
-    ],
-    deps = [
-        "fbsource//third-party/pypi/packaging:packaging",
-        "//executorch/exir/backend:compile_spec_schema",
-    ],
-)
-python_library(
-    name = "tosa_utils",
-    srcs = [
-        "tosa_utils.py",
-    ],
-    deps = [
-        "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
-        ":tosa_quant_utils",
-        "//executorch/backends/arm/operators:node_visitor",
-    ],
-)
 python_library(
     name = "arm_model_evaluator",
     srcs = [
 
@@ -6,8 +6,8 @@ python_library(
     deps = [
         "//executorch/backends/arm:common",
         "//executorch/backends/arm:constants",
-        "//executorch/backends/arm:tosa_quant_utils",
-        "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/arm/tosa:quant_utils",
+        "//executorch/backends/arm/tosa:utils",
         "//executorch/backends/arm/tosa/dialect:lib",
         "//executorch/backends/transforms:fuse_view_copy",
         "//executorch/backends/transforms:remove_getitem_op",
 
@@ -37,6 +37,7 @@
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_cumsum_pass import DecomposeCumsumPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
+from .decompose_div_tensor_mode import DecomposeDivTensorModePass  # noqa
 from .decompose_elu_pass import DecomposeEluPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
 from .decompose_expm1_pass import DecomposeExpm1Pass  # noqa
 
@@ -42,6 +42,7 @@
     DecomposeCosineSimilarityPass,
     DecomposeCumsumPass,
     DecomposeDivPass,
+    DecomposeDivTensorModePass,
     DecomposeEluPass,
     DecomposeEmbeddingPass,
     DecomposeExpm1Pass,
@@ -211,6 +212,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
         )
         self.add_pass(DecomposeNotEqualPass())
+        self.add_pass(DecomposeDivTensorModePass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxPass())
         self.add_pass(DecomposeGeluPass())
@@ -289,6 +291,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeGluPass())
+        self.add_pass(DecomposeDivTensorModePass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeLinearVectorNormPass())
Original file line number	Diff line number	Diff line change
`@@ -175,7 +175,6 @@ def dequantize_affine(context, node):`
`175`	`175`	`int_data.astype(quantized_np_dtype),`
`176`	`176`	`zero_point,`
`177`	`177`	`scale,`
`178`		`- axis=-1,`
`179`	`178`	`name=node.name,`
`180`	`179`	`)`
`181`	`180`	`context.add(output, node.name)`