pytorch
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 9 additions & 4 deletions b/‎CONTRIBUTING.md‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm‎
Lines changed: 29 additions & 16 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm‎
Lines changed: 29 additions & 16 deletions
diff --git a/‎backends/apple/coreml/scripts/install_requirements.sh‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/scripts/install_requirements.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 5 additions & 13 deletions b/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 5 additions & 13 deletions
diff --git a/‎backends/arm/test/common.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/test/common.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/test/runner_utils.py‎
Lines changed: 12 additions & 0 deletions b/‎backends/arm/test/runner_utils.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/cadence/aot/quantizer/patterns.py‎
Lines changed: 8 additions & 0 deletions b/‎backends/cadence/aot/quantizer/patterns.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/cadence/aot/remove_ops.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/remove_ops.py‎
Lines changed: 1 addition & 0 deletions
@@ -283,10 +283,15 @@ for basics.
    - If the reviewers have requests or questions, follow up with them.
    - The goal of the reviewer is to ensure that the code in the `main` branch of
      the repo is consistent, maintainable, and of high quality.
-1. Once approved, your reviewer will import the PR into Meta's internal system
-   and merge it from there.
-   - If the PR is approved and not merged within a few business days, please
-     comment on the PR to ask about its status.
+1. Once the PR has been approved,
+   - If you have the "write permission" in this repo, you can merge it yourself
+     by clicking the "Squash and merge" button once it is green and all CI
+     signals are passing.
+   - If you don't have "write permission" in this repo, the reviewer will take
+     care of the PR. The reviewer may import the PR into Meta's internal system
+     to validate it against internal CI.
+   - If the PR is approved but not merged within 5 business days, please comment
+     on the PR to ask about its status.
    - Note that if the `main` [CI](#continuous-integration) jobs are broken, we
      will only merge PRs that fix the broken jobs until all critical jobs are
      fixed.
 
@@ -26,25 +26,38 @@ + (nullable NSURL *)compileModelAtURL:(NSURL *)modelURL
 #else
     __block NSError *localError = nil;
     __block NSURL *result = nil;
-    
-    dispatch_semaphore_t sema = dispatch_semaphore_create(0);
-    [MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
-        result = [tempURL copy];
-        localError = compilationError;
-        dispatch_semaphore_signal(sema);
-    }];
-    
-    long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
-    if (status != 0) {
+
+    if (@available(iOS 16, macOS 13, watchOS 9, tvOS 16, *)) {
+        dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+        [MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
+            result = [tempURL copy];
+            localError = compilationError;
+            dispatch_semaphore_signal(sema);
+        }];
+
+        long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
+        if (status != 0) {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                        ETCoreMLErrorCompilationFailed,
+                                        "%@: Failed to compile model in %f seconds.",
+                                        NSStringFromClass(ETCoreMLModelCompiler.class),
+                                        maxWaitTimeInSeconds);
+            return nil;
+        }
+    } else {
+        result = [MLModel compileModelAtURL:modelURL error:&localError];
+    }
+
+    if (localError) {
         ETCoreMLLogErrorAndSetNSError(error,
-                                      ETCoreMLErrorCompilationFailed,
-                                      "%@: Failed to compile model in %f seconds.",
-                                      NSStringFromClass(ETCoreMLModelCompiler.class),
-                                      maxWaitTimeInSeconds);
+                                    ETCoreMLErrorCompilationFailed,
+                                    "%@: Failed to compile model, error: %@",
+                                    NSStringFromClass(ETCoreMLModelCompiler.class),
+                                    localError);
         return nil;
+    } else {
+        return result;
     }
-    
-    return result;
 #endif
 }
 
 
@@ -24,7 +24,7 @@ rm -rf "$COREML_DIR_PATH/third-party"
 mkdir "$COREML_DIR_PATH/third-party"
 
 echo "${green}ExecuTorch: Cloning coremltools."
-git clone --depth 1 --branch 8.0 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
+git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
 cd $COREMLTOOLS_DIR_PATH
 
 STATUS=$?
 
@@ -71,23 +71,15 @@ def test_vit_skip_conv(self):
             )
         )
 
-        conv_block = ["aten.convolution.default", "executorch_call_delegate"]
-        safe_softmax_block = [
-            "getitem",
-            "getitem",
-            "getitem",
-            "getitem",
-            "aten.any.dim",
-            "executorch_call_delegate",
-        ]
-        final_block = ["getitem"]
-        total = conv_block + 12 * safe_softmax_block + final_block
-
         assert [
             node.target.__name__
             for node in delegated_program_manager.exported_program().graph.nodes
             if node.op == "call_function"
-        ] == total
+        ] == [
+            "aten.convolution.default",
+            "executorch_call_delegate",
+            "getitem",
+        ]
 
     def test_buffer(self):
         embedding_dim = 3
 
@@ -29,6 +29,7 @@ class arm_test_options(Enum):
     corstone300 = auto()
     dump_path = auto()
     date_format = auto()
+    fast_fvp = auto()
 
 
 _test_options: dict[arm_test_options, Any] = {}
@@ -41,6 +42,7 @@ def pytest_addoption(parser):
     parser.addoption("--arm_run_corstone300", action="store_true")
     parser.addoption("--default_dump_path", default=None)
     parser.addoption("--date_format", default="%d-%b-%H:%M:%S")
+    parser.addoption("--fast_fvp", action="store_true")
 
 
 def pytest_configure(config):
@@ -63,6 +65,7 @@ def pytest_configure(config):
                 f"Supplied argument 'default_dump_path={dump_path}' that does not exist or is not a directory."
             )
     _test_options[arm_test_options.date_format] = config.option.date_format
+    _test_options[arm_test_options.fast_fvp] = config.option.fast_fvp
     logging.basicConfig(level=logging.INFO, stream=sys.stdout)
 
 
 
@@ -17,6 +17,8 @@
 import numpy as np
 import torch
 
+from executorch.backends.arm.test.common import arm_test_options, is_option_enabled
+
 from torch.export import ExportedProgram
 from torch.fx.node import Node
 
@@ -249,6 +251,10 @@ def run_corstone(
         for input_path in input_paths:
             cmd_line += f" -i {input_path}"
 
+        ethos_u_extra_args = ""
+        if is_option_enabled(arm_test_options.fast_fvp):
+            ethos_u_extra_args = ethos_u_extra_args + "--fast"
+
         command_args = {
             "corstone-300": [
                 "FVP_Corstone_SSE-300_Ethos-U55",
@@ -267,6 +273,8 @@ def run_corstone(
                 "-C",
                 "cpu0.semihosting-stack_base=0",
                 "-C",
+                f"ethosu.extra_args='{ethos_u_extra_args}'",
+                "-C",
                 "cpu0.semihosting-heap_limit=0",
                 "-C",
                 f"cpu0.semihosting-cmd_line='{cmd_line}'",
@@ -282,6 +290,8 @@ def run_corstone(
                 "-C",
                 "mps4_board.visualisation.disable-visualisation=1",
                 "-C",
+                "vis_hdlcd.disable_visualisation=1",
+                "-C",
                 "mps4_board.telnetterminal0.start_telnet=0",
                 "-C",
                 "mps4_board.uart0.out_file='-'",
@@ -296,6 +306,8 @@ def run_corstone(
                 "-C",
                 "mps4_board.subsystem.cpu0.semihosting-heap_limit=0",
                 "-C",
+                f"mps4_board.subsystem.ethosu.extra_args='{ethos_u_extra_args}'",
+                "-C",
                 f"mps4_board.subsystem.cpu0.semihosting-cmd_line='{cmd_line}'",
                 "-a",
                 elf_path,
 
@@ -235,12 +235,12 @@ def quantize_and_export_to_cadence(
 def export_to_executorch_gen_etrecord(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
-    dump_graphs: bool = False,
     output_dir: Optional[str] = None,
     opt_level: int = 1,
+    dump_graphs: bool = False,
 ) -> ExecutorchProgramManager:
-    edge_prog_manager = export_to_edge(model, inputs)
     cadence_passes = get_cadence_passes(opt_level)
+    edge_prog_manager = export_to_edge(model, inputs, dump_graphs)
 
     # Run a couple required passes for quant/dequant ops
     cadence_prog_manager = edge_prog_manager.transform(
 
@@ -426,6 +426,9 @@ def fuse_quantized_batch_norm_with_conv(
         # Note: there is a quantized.conv2d.new operator in the resulting graph
         # that takes a torch.classes.quantized.Conv2dPackedParamsBase as one of the input
         # this prevents us to directly call graph_module.recompile().
+        # pyre-fixme[16]: `GraphModule` has no attribute `_code`.
+        # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute
+        #  `python_code`.
         graph_module._code = graph_module._graph.python_code(root_module="self").src
 
     def __init__(self):
 
@@ -75,6 +75,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         addmm_node = fused_partition[0].nodes[-1]
 
         bias_qspec = DerivedQuantizationSpec(
@@ -107,6 +108,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         bmm_node = fused_partition[0].nodes[-1]
 
         return PartitionAnchors(
@@ -127,6 +129,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         conv1d_node = fused_partition[0].nodes[-1]
 
         bias_qspec = DerivedQuantizationSpec(
@@ -165,6 +168,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         conv2d_node = fused_partition[0].nodes[-1]
 
         bias_qspec = DerivedQuantizationSpec(
@@ -203,6 +207,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         layer_norm_node = fused_partition[0].nodes[-1]
 
         others = [(layer_norm_node, 1)]
@@ -237,6 +242,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         linear_node = fused_partition[0].nodes[-1]
 
         bias_qspec = DerivedQuantizationSpec(
@@ -275,6 +281,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         matmul_node = fused_partition[0].nodes[-1]
 
         return PartitionAnchors(
@@ -297,6 +304,7 @@ def partition_types(self) -> List[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> PartitionAnchors:
+        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         relu_node = fused_partition[0].nodes[-1]
 
         return PartitionAnchors(
 
@@ -561,6 +561,7 @@ class Subgraph:
         exir_ops.edge.aten.mul.Tensor,
         exir_ops.edge.aten.mean.dim,
         exir_ops.edge.aten.cat.default,
+        exir_ops.edge.aten.hardtanh.default,
         exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
         exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
     }
Original file line number	Diff line number	Diff line change
`@@ -561,6 +561,7 @@ class Subgraph:`
`561`	`561`	`exir_ops.edge.aten.mul.Tensor,`
`562`	`562`	`exir_ops.edge.aten.mean.dim,`
`563`	`563`	`exir_ops.edge.aten.cat.default,`
	`564`	`+ exir_ops.edge.aten.hardtanh.default,`
`564`	`565`	`exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,`
`565`	`566`	`exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,`
`566`	`567`	`}`