pytorch
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 6 additions & 4 deletions b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 4 additions & 0 deletions b/‎.lintrunner.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 8 deletions b/‎CMakeLists.txt‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎backends/apple/coreml/scripts/build_tests.sh‎
Lines changed: 1 addition & 2 deletions b/‎backends/apple/coreml/scripts/build_tests.sh‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/arm/_passes/scalars_to_attribute_pass.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/_passes/scalars_to_attribute_pass.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/arm/runtime/EthosUBackend.cpp‎
Lines changed: 42 additions & 10 deletions b/‎backends/arm/runtime/EthosUBackend.cpp‎
Lines changed: 42 additions & 10 deletions
diff --git a/‎backends/arm/scripts/parse_test_names.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/scripts/parse_test_names.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/test/models/test_deit_tiny_arm.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/test/models/test_deit_tiny_arm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/test/models/test_llama.py‎
Lines changed: 0 additions & 6 deletions b/‎backends/arm/test/models/test_llama.py‎
Lines changed: 0 additions & 6 deletions
@@ -15,7 +15,7 @@ buck2 query "//backends/apple/... + //backends/example/... + \
 //kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
 //kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
 
-UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
+UNBUILDABLE_OPTIMIZED_OPS_REGEX="_elu|gelu|fft|log_softmax"
 BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
 
 # TODO: build prim_ops_test_cpp again once supported_features works in
@@ -24,6 +24,8 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
 # TODO: expand the covered scope of Buck targets.
 # //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
 # //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
-buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
-      $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
-      //runtime/executor: //runtime/kernel/... //runtime/platform/...
+for op in "build" "test"; do
+    buck2 $op $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
+          $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
+          //runtime/executor: //runtime/kernel/... //runtime/platform/...
+done
@@ -271,6 +271,10 @@ exclude_patterns = [
     'examples/**',
     'exir/verification/bindings.cpp',
     'extension/**',
+    # Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
+    'kernels/portable/cpu/util/elementwise_util.h',
+    'kernels/portable/cpu/util/math_util.h',
+    'kernels/portable/cpu/util/vectorized_math.h',
     'kernels/optimized/**',
     'runtime/core/exec_aten/**',
     # Want to be able to keep c10 in sync with PyTorch core.
 
@@ -430,14 +430,6 @@ endif()
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
 
-#
-# gflags: Commandline flag host library.
-#
-
-if(EXECUTORCH_BUILD_GFLAGS)
-  add_subdirectory(third-party/gflags)
-endif()
-
 # Install `executorch` library as well as `executorch-config.cmake` under
 # ${CMAKE_INSTALL_PREFIX}/
 install(
 
@@ -33,8 +33,7 @@ cmake "$EXECUTORCH_ROOT_PATH" -B"$CMAKE_EXECUTORCH_BUILD_DIR_PATH" \
 -DPLATFORM=MAC_UNIVERSAL \
 -DDEPLOYMENT_TARGET=13.0 \
 -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
--DEXECUTORCH_BUILD_XNNPACK=OFF \
--DEXECUTORCH_BUILD_GFLAGS=OFF
+-DEXECUTORCH_BUILD_XNNPACK=OFF
 
 cmake --build "$CMAKE_EXECUTORCH_BUILD_DIR_PATH"  -j9 -t executorch
 
 
@@ -12,8 +12,8 @@
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 
 from executorch.exir.pass_base import ExportPass, PassResult
-from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix
 from torch.fx import GraphModule, Node
+from torchao.quantization.pt2e.utils import get_new_attr_name_with_prefix
 
 
 class ScalarsToAttributePass(ExportPass):
 
@@ -221,10 +221,12 @@ def _match_pattern(
     torch.ops.aten.squeeze_copy.dim,
     torch.ops.aten.squeeze.dim,
     torch.ops.aten.squeeze.dims,
+    torch.ops.aten.unbind.int,
     torch.ops.aten.unsqueeze.default,
     torch.ops.aten.unsqueeze_copy.default,
     torch.ops.aten.reshape.default,
     torch.ops.aten.repeat.default,
+    torch.ops.aten.repeat_interleave.self_int,
     torch.ops.aten.expand_copy.default,
     torch.ops.aten.expand.default,
     # Disabling these as there seems to be an issue with support for complex
@@ -256,6 +258,7 @@ def _match_pattern(
     torch.ops.aten.amin.default,
     torch.ops.aten.clamp.default,
     torch.ops.aten.clamp.Tensor,
+    torch.ops.aten.unflatten.int,
 ]
 
 _one_to_one_shared_input_or_input_act_qspec = [
@@ -271,6 +274,7 @@ def _match_pattern(
     torch.ops.aten.avg_pool2d.default,
     torch.ops.aten.max_pool2d.default,
     torch.ops.aten.full.default,
+    torch.ops.aten.full,
     torch.ops.aten.flatten.using_ints,
     torch.ops.aten.dropout.default,
     torch.ops.aten.dropout_.default,
@@ -539,6 +543,7 @@ def annotate_graph(  # type: ignore[return]
         if node.target in [
             torch.ops.aten.full_like.default,
             torch.ops.aten.full.default,
+            torch.ops.aten.full,
             torch.ops.aten.scalar_tensor.default,
         ]:
             node.kwargs = {}
@@ -261,12 +261,24 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
             event_tracer,
             "+EthosUBackend::execute()handles.input.permute_CHW_to_HWC()");
         // permuted byte copy CHW to HWC
+        int c, h, w;
+        if (tensor_in.dim() == 4) {
+          c = tensor_in.size(1);
+          h = tensor_in.size(2);
+          w = tensor_in.size(3);
+        } else if (tensor_in.dim() == 5) {
+          c = tensor_in.size(2);
+          h = tensor_in.size(3);
+          w = tensor_in.size(4);
+        } else {
+          ET_LOG(
+              Error,
+              "Unsupported input tensor dimension %d, expected 4 or 5",
+              tensor_in.dim());
+          return Error::InvalidProgram;
+        }
         permute_CHW_to_HWC(
-            tensor_in.mutable_data_ptr<char>(),
-            scratch_addr,
-            tensor_in.size(1),
-            tensor_in.size(2),
-            tensor_in.size(3));
+            tensor_in.mutable_data_ptr<char>(), scratch_addr, c, h, w);
       } else if (both_char or both_int or both_short) {
         EXECUTORCH_PROF_SCOPE(
             event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
@@ -364,12 +376,24 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
             "+EthosUBackend::execute()handles.output.permute_HWC_to_CHW()");
 
         char* output_address = (char*)output_addr;
+        int c, h, w;
+        if (tensor_out.dim() == 4) {
+          c = tensor_out.size(1);
+          h = tensor_out.size(2);
+          w = tensor_out.size(3);
+        } else if (tensor_out.dim() == 5) {
+          c = tensor_out.size(2);
+          h = tensor_out.size(3);
+          w = tensor_out.size(4);
+        } else {
+          ET_LOG(
+              Error,
+              "Unsupported output tensor dimension %d, expected 4 or 5",
+              tensor_out.dim());
+          return Error::InvalidProgram;
+        }
         permute_HWC_to_CHW(
-            output_address,
-            tensor_out.mutable_data_ptr<char>(),
-            tensor_out.size(1),
-            tensor_out.size(2),
-            tensor_out.size(3));
+            output_address, tensor_out.mutable_data_ptr<char>(), c, h, w);
       } else {
         EXECUTORCH_PROF_SCOPE(
             event_tracer, "+EthosUBackend::execute()handles.output.move()");
@@ -430,6 +454,14 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
       if (permuted_shape) {
         ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
       }
+    } else if (tensor.dim() == 5) {
+      // Same as above, but for 5D tensors.
+      permuted_shape = tensor.size(0) == io->shape[0] &&
+          tensor.size(1) == io->shape[1] && tensor.size(2) == io->shape[4] &&
+          tensor.size(3) == io->shape[2] && tensor.size(4) == io->shape[3];
+      if (permuted_shape) {
+        ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
+      }
     }
     *is_permuted = permuted_shape;
     return Error::Ok;
 
@@ -17,6 +17,8 @@
     "bitwise_right_shift.Tensor",
     "bitwise_left_shift.Tensor",
     "native_group_norm.default",
+    "unbind.int",
+    "unflatten.int",
     "_native_batch_norm_legit_no_training.default",
     "_native_batch_norm_legit.no_stats",
 ]
 
@@ -52,7 +52,7 @@ def test_deit_tiny_tosa_BI():
         aten_op=[],
         exir_op=[],
         use_to_edge_transform_and_lower=True,
-        atol=2.5,  # This needs to go down: MLETORCH-956
+        atol=1,
         qtol=1,
     )
     pipeline.run()
@@ -126,10 +126,4 @@ def test_llama_tosa_BI():
             exir_op=[],
             use_to_edge_transform_and_lower=True,
         )
-        pipeline.change_args(
-            "run_method_and_compare_outputs",
-            atol=9.9,
-            rtol=1.5,  # TODO: Tolerance needs to be updated after MLETORCH-907
-            inputs=llama_inputs,
-        )
         pipeline.run()
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,8 @@`
`17`	`17`	`"bitwise_right_shift.Tensor",`
`18`	`18`	`"bitwise_left_shift.Tensor",`
`19`	`19`	`"native_group_norm.default",`
	`20`	`+ "unbind.int",`
	`21`	`+ "unflatten.int",`
`20`	`22`	`"_native_batch_norm_legit_no_training.default",`
`21`	`23`	`"_native_batch_norm_legit.no_stats",`
`22`	`24`	`]`
Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ def test_deit_tiny_tosa_BI():`
`52`	`52`	`aten_op=[],`
`53`	`53`	`exir_op=[],`
`54`	`54`	`use_to_edge_transform_and_lower=True,`
`55`		`- atol=2.5, # This needs to go down: MLETORCH-956`
	`55`	`+ atol=1,`
`56`	`56`	`qtol=1,`
`57`	`57`	`)`
`58`	`58`	`pipeline.run()`