pytorch
diff --git a/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 2 additions & 42 deletions b/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 2 additions & 42 deletions
diff --git a/‎.ci/scripts/test_ane_static_llama.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_ane_static_llama.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions b/‎backends/apple/coreml/CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 14 additions & 8 deletions b/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 7 additions & 2 deletions b/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 12 additions & 4 deletions b/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎backends/apple/mps/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/mps/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions
@@ -7,47 +7,7 @@
 
 set -ex
 
-verify_pkg_installed() {
-  echo $(dpkg-query -W --showformat='${Status}\n' $1|grep "install ok installed")
-}
+source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
 
-install_qnn() {
-  echo "Start installing qnn."
-  QNN_INSTALLATION_DIR=/tmp/qnn
-  mkdir -p "${QNN_INSTALLATION_DIR}"
-
-  curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip"
-  echo "Finishing downloading qnn sdk."
-  unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp
-  echo "Finishing unzip qnn sdk."
-
-
-  # Print the content for manual verification
-  ls -lah "/tmp/qairt"
-  mv "/tmp/qairt"/* "${QNN_INSTALLATION_DIR}"
-  echo "Finishing installing qnn '${QNN_INSTALLATION_DIR}' ."
-
-  ls -lah "${QNN_INSTALLATION_DIR}"
-}
-
-setup_libc++() {
-  clang_version=$1
-  sudo apt-get update
-  pkgs_to_check=("libc++-${clang_version}-dev")
-  j=0
-  while [ $j -lt ${#pkgs_to_check[*]} ]; do
-    install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
-    if [ "$install_status" == "" ]; then
-      sudo apt-get install -y ${pkgs_to_check[$j]}
-      if [[ $? -ne 0 ]]; then
-        echo "ERROR: Failed to install required packages for libc++"
-        exit 1
-      fi
-    fi
-    j=$(( $j +1));
-  done
-}
-
-# This needs to match with the clang version from the Docker image
-setup_libc++ 12
+setup_libcpp 12
 install_qnn
@@ -28,6 +28,6 @@ pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
 # Download stories llama110m artifacts
 download_stories_model_artifacts
 
-python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
+python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w --embedding-quantize 4,32
 
 popd
@@ -416,7 +416,7 @@ target_link_libraries(executorch PRIVATE executorch_core)
 target_include_directories(executorch PUBLIC ${_common_include_directories})
 target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
 target_compile_options(executorch PUBLIC ${_common_compile_options})
-target_link_options_shared_lib(executorch)
+executorch_target_link_options_shared_lib(executorch)
 
 #
 # portable_ops_lib: A library to register core ATen ops using portable kernels,
@@ -690,7 +690,7 @@ endif()
 
 if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
-  target_link_options_shared_lib(quantized_ops_lib)
+  executorch_target_link_options_shared_lib(quantized_ops_lib)
 endif()
 
 if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
 
@@ -199,7 +199,7 @@ if(APPLE)
       ${CMAKE_CURRENT_SOURCE_DIR}/third-party/coremltools/deps/protobuf/cmake
     )
 
-    target_link_options_shared_lib(libprotobuf-lite)
+    executorch_target_link_options_shared_lib(libprotobuf-lite)
     target_link_libraries(coremldelegate PRIVATE libprotobuf-lite)
   endif()
 
@@ -210,7 +210,7 @@ if(APPLE)
             ${FOUNDATION_FRAMEWORK} ${SQLITE_LIBRARY}
   )
 
-  target_link_options_shared_lib(coremldelegate)
+  executorch_target_link_options_shared_lib(coremldelegate)
 
   if(EXECUTORCH_COREML_BUILD_EXECUTOR_RUNNER)
     target_link_libraries(
 
@@ -23,25 +23,27 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.WARNING)
+logger.setLevel(logging.INFO)
 
 
-class OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
+class _OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
     def __init__(
         self,
         skip_ops_for_coreml_delegation: Optional[List[str]] = None,
         lower_full_graph: bool = False,
+        log: bool = False,
     ) -> None:
         if skip_ops_for_coreml_delegation is None:
             skip_ops_for_coreml_delegation = []
         super().__init__()
         self.skip_ops_for_coreml_delegation = skip_ops_for_coreml_delegation
         self.lower_full_graph = lower_full_graph
         self._logged_msgs = set()
+        self._log = log
 
     def log_once(self, msg: str) -> None:
-        if msg not in self._logged_msgs:
-            logging.info(msg)
+        if self._log and msg not in self._logged_msgs:
+            logger.info(msg)
             self._logged_msgs.add(msg)
 
     def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
@@ -154,8 +156,10 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
 
         capability_partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
-            OperatorsSupportedForCoreMLBackend(
-                self.skip_ops_for_coreml_delegation, self.lower_full_graph
+            _OperatorsSupportedForCoreMLBackend(
+                self.skip_ops_for_coreml_delegation,
+                self.lower_full_graph,
+                log=True,
             ),
             allows_single_node_partition=True,
         )
@@ -191,8 +195,10 @@ def ops_to_not_decompose(
         self, ep: ExportedProgram
     ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
         do_not_decompose = []
-        op_support = OperatorsSupportedForCoreMLBackend(
-            self.skip_ops_for_coreml_delegation, self.lower_full_graph
+        op_support = _OperatorsSupportedForCoreMLBackend(
+            self.skip_ops_for_coreml_delegation,
+            self.lower_full_graph,
+            log=False,
         )
 
         # CoreML prevents certain ops (like triu) from lowering to CoreML when put in the ExecuTorch op namespace
 
@@ -16,7 +16,6 @@
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
 from executorch.exir.backend.utils import format_delegated_graph
-from executorch.runtime import Runtime
 
 
 @torch.library.custom_op("unsupported::linear", mutates_args=())
@@ -37,7 +36,13 @@ def _(
     return torch.ops.aten.linear.default(x, w, b)
 
 
-_TEST_RUNTIME = sys.platform == "darwin"
+def is_fbcode():
+    return not hasattr(torch.version, "git_version")
+
+
+_TEST_RUNTIME = (sys.platform == "darwin") and not is_fbcode()
+if _TEST_RUNTIME:
+    from executorch.runtime import Runtime
 
 
 class TestCoreMLPartitioner(unittest.TestCase):
 
@@ -14,12 +14,20 @@
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
-from executorch.runtime import Runtime
 from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
 
-_TEST_RUNTIME = sys.platform == "darwin" and tuple(
-    map(int, platform.mac_ver()[0].split("."))
-) >= (15, 0)
+
+def is_fbcode():
+    return not hasattr(torch.version, "git_version")
+
+
+_TEST_RUNTIME = (
+    (sys.platform == "darwin")
+    and not is_fbcode()
+    and tuple(map(int, platform.mac_ver()[0].split("."))) >= (15, 0)
+)
+if _TEST_RUNTIME:
+    from executorch.runtime import Runtime
 
 
 class TestTorchOps(unittest.TestCase):
 
@@ -70,7 +70,7 @@ target_link_libraries(
           ${MPS_FRAMEWORK} ${MPS_GRAPH_FRAMEWORK}
 )
 
-target_link_options_shared_lib(mpsdelegate)
+executorch_target_link_options_shared_lib(mpsdelegate)
 target_compile_options(mpsdelegate PUBLIC ${_common_compile_options})
 target_compile_options(mpsdelegate PRIVATE "-fno-objc-arc")
 
 
@@ -40,6 +40,7 @@
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
+from .decompose_masked_fill import DecomposeMaskedFill  # noqa
 from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
 
@@ -45,6 +45,7 @@
     DecomposeLeakyReLUPass,
     DecomposeLinearPass,
     DecomposeLinearVectorNormPass,
+    DecomposeMaskedFill,
     DecomposeMaxPool2DPass,
     DecomposeMeanDimPass,
     DecomposeNotEqualPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
         )
+
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
@@ -146,6 +148,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeMaxPool2DPass())
         self.add_pass(SizeAdjustInputPass())
         self.add_pass(DecomposeSelectPass())
+
         self.add_pass(ConvertSqueezesToViewPass())
 
         self.add_pass(FuseViewCopyTransform())
@@ -160,6 +163,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         return self._transform(exported_program.graph_module)
 
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+        self.add_pass(DecomposeMaskedFill())
         self.add_pass(DecomposeRoundPass())
         self.add_pass(DecomposeAcoshPass())
         self.add_pass(DecomposeAsinPass())
@@ -285,4 +289,8 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(ReplaceInfValues())
         self.add_pass(DecomposeSumPass())
 
+        if not self.tosa_spec.is_U55_subset:
+            # Uses where which is not supported on Ethos-U55
+            self.add_pass(DecomposeMaskedFill())
+
         return self._transform(graph_module)
Original file line number	Diff line number	Diff line change
`@@ -199,7 +199,7 @@ if(APPLE)`
`199`	`199`	`${CMAKE_CURRENT_SOURCE_DIR}/third-party/coremltools/deps/protobuf/cmake`
`200`	`200`	`)`
`201`	`201`
`202`		`- target_link_options_shared_lib(libprotobuf-lite)`
	`202`	`+ executorch_target_link_options_shared_lib(libprotobuf-lite)`
`203`	`203`	`target_link_libraries(coremldelegate PRIVATE libprotobuf-lite)`
`204`	`204`	`endif()`
`205`	`205`
`@@ -210,7 +210,7 @@ if(APPLE)`
`210`	`210`	`${FOUNDATION_FRAMEWORK} ${SQLITE_LIBRARY}`
`211`	`211`	`)`
`212`	`212`
`213`		`- target_link_options_shared_lib(coremldelegate)`
	`213`	`+ executorch_target_link_options_shared_lib(coremldelegate)`
`214`	`214`
`215`	`215`	`if(EXECUTORCH_COREML_BUILD_EXECUTOR_RUNNER)`
`216`	`216`	`target_link_libraries(`
Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ target_link_libraries(`
`70`	`70`	`${MPS_FRAMEWORK} ${MPS_GRAPH_FRAMEWORK}`
`71`	`71`	`)`
`72`	`72`
`73`		`-target_link_options_shared_lib(mpsdelegate)`
	`73`	`+executorch_target_link_options_shared_lib(mpsdelegate)`
`74`	`74`	`target_compile_options(mpsdelegate PUBLIC ${_common_compile_options})`
`75`	`75`	`target_compile_options(mpsdelegate PRIVATE "-fno-objc-arc")`
`76`	`76`