pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/wheel/test_base.py‎
Lines changed: 12 additions & 0 deletions b/‎.ci/scripts/wheel/test_base.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 15 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎backends/arm/_passes/add_bias_pass.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/add_bias_pass.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py‎
Lines changed: 3 additions & 1 deletion b/‎backends/arm/_passes/annotate_decomposed_matmul.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/arm/_passes/annotate_output_dim_order_pass.py‎
Lines changed: 6 additions & 1 deletion b/‎backends/arm/_passes/annotate_output_dim_order_pass.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/arm/_passes/arm_pass.py‎
Lines changed: 32 additions & 1 deletion b/‎backends/arm/_passes/arm_pass.py‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 32 additions & 1 deletion b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎backends/arm/_passes/broadcast_args_pass.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/broadcast_args_pass.py‎
Lines changed: 5 additions & 1 deletion
@@ -131,13 +131,13 @@ test_model_with_xnnpack() {
     return 0
   fi
 
-  # Delegation
+  # Delegation and test with pybindings
   if [[ ${WITH_QUANTIZATION} == true ]]; then
     SUFFIX="q8"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_after_export
   else
     SUFFIX="fp32"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_after_export
   fi
 
   OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"
 
@@ -145,6 +145,7 @@ run_core_tests () {
   echo "=== [$LABEL] Import smoke tests ==="
   "$PYBIN" -c "import executorch; print('executorch imported successfully')"
   "$PYBIN" -c "import executorch.backends.qualcomm; print('executorch.backends.qualcomm imported successfully')"
+  "$PYBIN" -c "from executorch.export.target_recipes import get_android_recipe; recipe = get_android_recipe('android-arm64-snapdragon-fp16'); print(f'executorch.export.target_recipes imported successfully: {recipe}')"
 
   echo "=== [$LABEL] List installed executorch/backends/qualcomm/python ==="
   local SITE_DIR
 
@@ -41,6 +41,18 @@ class ModelTest:
 
 
 def run_tests(model_tests: List[ModelTest]) -> None:
+    # Test that we can import the portable_lib module - verifies RPATH is correct
+    print("Testing portable_lib import...")
+    try:
+        from executorch.extension.pybindings._portable_lib import (  # noqa: F401
+            _load_for_executorch,
+        )
+
+        print("✓ Successfully imported _load_for_executorch from portable_lib")
+    except ImportError as e:
+        print(f"✗ Failed to import portable_lib: {e}")
+        raise
+
     # Why are we doing this envvar shenanigans? Since we build the testers, which
     # uses buck, we cannot run as root. This is a sneaky of getting around that
     # test.
 
@@ -869,6 +869,21 @@ if(EXECUTORCH_BUILD_PYBIND)
   target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
   target_link_libraries(portable_lib PRIVATE ${_dep_libs})
 
+  # Set RPATH to find PyTorch libraries relative to the installation location
+  # This goes from executorch/extension/pybindings up to site-packages, then to
+  # torch/lib
+  if(APPLE)
+    set_target_properties(
+      portable_lib PROPERTIES BUILD_RPATH "@loader_path/../../../torch/lib"
+                              INSTALL_RPATH "@loader_path/../../../torch/lib"
+    )
+  else()
+    set_target_properties(
+      portable_lib PROPERTIES BUILD_RPATH "$ORIGIN/../../../torch/lib"
+                              INSTALL_RPATH "$ORIGIN/../../../torch/lib"
+    )
+  endif()
+
   install(
     TARGETS portable_lib
     EXPORT ExecuTorchTargets
 
@@ -3,13 +3,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from typing import Set, Type
+
 import torch
 from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 from executorch.backends.transforms.utils import create_constant_placeholder
 
 from executorch.exir.dialects._ops import ops as exir_ops
-from executorch.exir.pass_base import PassResult
+from executorch.exir.pass_base import ExportPass, PassResult
 from torch.export.graph_signature import InputKind
 
 
@@ -19,6 +21,8 @@ class AddBiasPass(ArmPass):
     The bias is set to zero.
     """
 
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
     targeted_ops = (exir_ops.edge.aten.convolution.default,)
 
     def call(self, graph_module):
 
@@ -7,7 +7,7 @@
 
 import itertools
 import operator
-from typing import cast, List
+from typing import cast, List, Set, Type
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
@@ -29,6 +29,8 @@ class AnnotateDecomposedMatmulPass(ExportPass):
     matmul-op (can be mm or bmm).
     """
 
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
     def _match_partition_to_node(
         self, node: torch.fx.Node, partitioned_inputs: List[torch.fx.Node]
     ) -> torch.fx.Node:
 
@@ -3,9 +3,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+
+from typing import Set, Type
+
 from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import get_output_dim_orders
-from executorch.exir.pass_base import PassResult
+from executorch.exir.pass_base import ExportPass, PassResult
 
 
 class AnnotateOutputDimOrderPass(ArmPass):
@@ -14,6 +17,8 @@ class AnnotateOutputDimOrderPass(ArmPass):
     for verifying that the dim order does not change unexpectedly in later passes.
     """
 
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
     def call(self, graph_module):
         output_node = graph_module.graph.output_node()
         output_node.meta["original_dim_orders"] = get_output_dim_orders(graph_module)
 
@@ -6,7 +6,8 @@
 # pyre-unsafe
 
 import traceback
-from typing import Optional
+from abc import abstractmethod
+from typing import List, Optional, Set, Type
 
 import torch
 from executorch.exir.pass_base import ExportPass, NodeMetadata
@@ -19,6 +20,36 @@ def __init__(self, exported_program: Optional[torch.export.ExportedProgram] = No
         super(ArmPass, self).__init__()
         self.exported_program = exported_program
 
+    @property
+    @abstractmethod
+    def _passes_required_after(self) -> Set[Type[ExportPass]]:
+        """The subclass defines passes that must run after it"""
+        pass
+
+    @staticmethod
+    def get_required_passes(pass_) -> List[str]:
+        """
+        Returns the list of passes that must be run after this pass, sorted by name.
+        """
+        if hasattr(pass_, "_passes_required_after"):
+            return sorted([ArmPass.get_name(p) for p in pass_._passes_required_after])
+        else:
+            return []
+
+    @staticmethod
+    def get_name(pass_) -> str:
+        """
+        Returns the name of the pass.
+        """
+        if isinstance(pass_, ExportPass):
+            return pass_.__class__.__name__
+        elif hasattr(pass_, "__name__"):
+            return pass_.__name__
+        else:
+            raise ValueError(
+                f"Cannot get name for pass: {pass_}. It must be an instance of ExportPass or have a __name__ attribute."
+            )
+
     def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False):
         if not updated:
             return super().call_operator(op, args, kwargs, meta)
 
@@ -7,6 +7,9 @@
 
 # pyre-unsafe
 
+
+from collections import defaultdict
+
 import executorch.backends.arm.tosa.dialect  # noqa: unused
 from executorch.backends.arm._passes import (
     AddBiasPass,
@@ -94,6 +97,7 @@
     UnsqueezeScalarPlaceholdersPass,
 )
 
+from executorch.backends.arm._passes.arm_pass import ArmPass
 from executorch.backends.arm.tosa.specification import (
     TosaLoweringContext,
     TosaSpecification,
@@ -115,6 +119,32 @@ def __init__(self, tosa_spec: TosaSpecification) -> None:
         self.tosa_spec = tosa_spec
         super().__init__()
 
+    def validate_constraints_mandatory(self):
+        """
+        Validates that necessary passes have run before transforming to backend.
+
+        Note that this differs from the original validate_constraints function, which
+        only checks the order of passes.
+        """
+        passes_to_run = defaultdict(list)
+
+        for current_pass in self.passes:
+            current_pass_name = ArmPass.get_name(current_pass)
+            for required_pass_name in ArmPass.get_required_passes(current_pass):
+                passes_to_run[required_pass_name].append(current_pass_name)
+
+            passes_to_run.pop(current_pass_name, None)
+
+        if len(passes_to_run) > 0:
+            error_msg = "The following constraints for passes are not met:\n"
+            for required_pass, requiring_passes in passes_to_run.items():
+                for requiring_pass in requiring_passes:
+                    error_msg += (
+                        f"  - {required_pass} must run after {requiring_pass}\n"
+                    )
+
+            raise RuntimeError(error_msg)
+
     def _transform(self, graph_module: GraphModule):
         with TosaLoweringContext(self.tosa_spec):
             return self(graph_module).graph_module
@@ -125,7 +155,6 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(RemoveGetItemPass())
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(ConvertMmToBmmPass())
-        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
         )
@@ -175,6 +204,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(RemoveNoopPass())
         self.add_pass(InsertRescalePass())
 
+        self.validate_constraints_mandatory()
         return self._transform(exported_program.graph_module)
 
     def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
@@ -258,6 +288,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(RemoveNoopPass())
         self.add_pass(InsertRescalePass())
 
+        self.validate_constraints_mandatory()
         return self._transform(exported_program.graph_module)
 
     def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
 
@@ -3,6 +3,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from typing import Set, Type
+
 from executorch.backends.arm._passes import ArmPass
 
 from executorch.backends.arm._passes.arm_pass_utils import (
@@ -12,7 +14,7 @@
 
 from executorch.exir.dialects._ops import ops as exir_ops
 
-from executorch.exir.pass_base import PassResult
+from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule, Node
 
 
@@ -22,6 +24,8 @@ class BroadcastArgsPass(ArmPass):
     This is done when more than one arg needs broadcasting.
     """
 
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
     targeted_ops = {
         exir_ops.edge.aten.add.Tensor,
         exir_ops.edge.aten.sub.Tensor,