psiddh
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 11 additions & 1 deletion b/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 36 additions & 13 deletions b/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 36 additions & 13 deletions
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 8 additions & 5 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/aoti/aoti_backend.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/aoti/aoti_backend.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/arm/MODELS.md‎
Lines changed: 20 additions & 0 deletions b/‎backends/arm/MODELS.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass.py‎
Lines changed: 17 additions & 2 deletions b/‎backends/arm/_passes/arm_pass.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 39 additions & 37 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 39 additions & 37 deletions
@@ -1 +1 @@
-7a064ed3eafa43f17412d434b395240c727b3000
+7a79b41e29a790ebb4b530eb98a89381e2d7de29
@@ -101,6 +101,9 @@ install_devicefarm_cli() {
 }
 
 reserve_if_needed() {
+  # Set default value
+  export DEVICE_RESERVED=0
+
   if ! command -v devicefarm-cli >/dev/null 2>&1; then
     echo "[WARN] devicefarm-cli is not installed." >&2
     return 1
@@ -144,7 +147,11 @@ reserve_if_needed() {
 
   if (( any_below_threshold )); then
     echo "[INFO] Reserving now."
-    devicefarm-cli -R
+    if ! devicefarm-cli -R; then
+      echo "::warning::Failed to reserve a device. No devices are currently available." >&2
+      echo "[WARN] Device reservation failed - continuing without device." >&2
+      return 0
+    fi
   else
     echo "[INFO] Don't need to be reserved."
   fi
@@ -174,7 +181,10 @@ reserve_if_needed() {
   if [[ -n "$reservation_id" ]]; then
     devicefarm-cli -C "$reservation_id"
     devicefarm-cli -E "ls /"
+    export DEVICE_RESERVED=1
+    echo "[INFO] Device successfully reserved and connected."
   else
+    echo "::warning::No available devices found." >&2
     echo "[WARN] There is no available devices."
   fi
 }
 
@@ -2,14 +2,20 @@ name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
 
 on:
   workflow_dispatch:
-  pull_request: 
-   paths: 
-     .github/workflows/add-unanswered-to-project.yml
+  schedule:
+    # GitHub Actions cron uses UTC. These run at:
+    #  - 14:00 UTC  -> 08:00 CST (UTC-6)
+    #  - 19:00 UTC  -> 13:00 CST (UTC-6)
+    - cron: "0 14 * * *"
+    - cron: "0 19 * * *"
+  pull_request:
+    paths:
+      - .github/workflows/add-unanswered-to-project.yml
 jobs:
   add_to_project:
     runs-on: ubuntu-latest
     steps:
-      - name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
+      - name: Add open issues and open, non-draft PRs to org project (excluding certain authors and bots)
         uses: actions/github-script@v7
         with:
           github-token: ${{ secrets.ET_EXT_CONTRIB }}
@@ -41,13 +47,26 @@ jobs:
               "app/dependabot", "Erik-Lundell", "zingo", "AdrianLundell", "oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM", 
               "robell", "mansnils", "martinlsm", "freddan80", "YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin", 
               "wwwind", "benkli01", "Tessil", "maddun01", "Michiel-Olieslagers", "armwaheed", "agrima1304", "emmakujala", "annietllnd", 
-              "MatthiasHertel80", "AlexTawseArm", "jmahbs", "morgolock", "Christoffer-JL", "ArmRyan", "xingguo01", "haowhsu-quic", 
-              "shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti", "chenweng-quic", 
-              "cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall", "MartinPavella", 
-              "roman-janik-nxp", "novak-vaclav ", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga", "daniil-lyakhov", 
-              "emmanuel-ferdman", "cavusmustafa", "anzr299", "Jiseong-oh", "alexdean08"
+              "MatthiasHertel80", "AlexTawseArm", "jmahbs", "morgolock", "Christoffer-JL", "ArmRyan", "xingguo01", "tgonzalezorlandoarm", 
+              "haowhsu-quic", "shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti", 
+              "chenweng-quic", "cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall", 
+              "MartinPavella", "roman-janik-nxp", "novak-vaclav", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga", 
+              "daniil-lyakhov", "emmanuel-ferdman", "cavusmustafa", "anzr299", "Jiseong-oh", "alexdean08",
+              // explicitly include the dependabot bot login seen in PRs
+              "dependabot[bot]"
             ]);
 
+            function isBotOrExcluded(user) {
+              if (!user) return false;
+              // GitHub sometimes marks bots with user.type === "Bot"
+              if (user.type && user.type.toLowerCase() === "bot") return true;
+              // Some bots use logins that end with [bot], e.g. dependabot[bot]
+              if (user.login && user.login.endsWith("[bot]")) return true;
+              // Explicit excluded list
+              if (excludedAuthors.has(user.login)) return true;
+              return false;
+            }
+
             async function addItem(contentId, type, number) {
               try {
                 await github.graphql(`
@@ -69,7 +88,7 @@ jobs:
             }
 
             try {
-              // Add open issues (not PRs) and exclude by author
+              // Add open issues (not PRs) and exclude by author/bots
               const issues = await github.paginate(
                 github.rest.issues.listForRepo,
                 {
@@ -80,12 +99,14 @@ jobs:
                 }
               );
               for (const issue of issues) {
-                if (!issue.pull_request && !excludedAuthors.has(issue.user.login)) {
+                if (!issue.pull_request && !isBotOrExcluded(issue.user)) {
                   await addItem(issue.node_id, 'issue', issue.number);
+                } else {
+                  console.log(`Skipping issue #${issue.number} by ${issue.user && issue.user.login}`);
                 }
               }
 
-              // Add open, non-draft PRs (regardless of review state), exclude by author
+              // Add open, non-draft PRs (regardless of review state), exclude by author/bots
               const prs = await github.paginate(
                 github.rest.pulls.list,
                 {
@@ -95,8 +116,10 @@ jobs:
                 }
               );
               for (const pr of prs) {
-                if (!pr.draft && !excludedAuthors.has(pr.user.login)) {
+                if (!pr.draft && !isBotOrExcluded(pr.user)) {
                   await addItem(pr.node_id, 'pr', pr.number);
+                } else {
+                  console.log(`Skipping PR #${pr.number} by ${pr.user && pr.user.login}`);
                 }
               }
             } catch (error) {
 
@@ -87,8 +87,8 @@ jobs:
         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
         PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
 
-  test-cuda-shims:
-    name: test-cuda-shims
+  unittest-cuda:
+    name: unittest-cuda
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
       id-token: write
@@ -103,17 +103,20 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
         set -eux
-        # Install requirements
-        bash ./install_requirements.sh
+        # Install executorch in editable mode so custom op libs land in-tree
+        bash ./install_executorch.sh
 
         # Build ExecuTorch with CUDA support
         cmake --workflow --preset llm-release-cuda
 
-        # Build and run CUDA shim tests
+        # Build and run CUDA shim tests (C++)
         pushd backends/cuda/runtime/shims/tests
         cmake --workflow --preset default
         popd
 
+        # Run CUDA backend Python tests, overrides addopts so that we don't run all tests in pytest.ini
+        python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
+
   export-model-cuda-artifact:
     name: export-model-cuda-artifact
     # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
 
@@ -945,6 +945,12 @@ jobs:
         export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
         source .ci/scripts/setup-samsung-linux-deps.sh
 
+        # Check if device was reserved
+        if [[ "${DEVICE_RESERVED:-0}" != "1" ]]; then
+          echo "::warning::Skipping tests - no Samsung device available"
+          exit 0
+        fi
+
         # Test quant models
         model_scripts="deeplab_v3 edsr inception_v3 inception_v4 mobilenet_v2 mobilenet_v3 resnet18 resnet50 vit wav2letter"
         for m_script in $model_scripts; do
@@ -981,6 +987,12 @@ jobs:
         export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
         source .ci/scripts/setup-samsung-linux-deps.sh
 
+        # Check if device was reserved
+        if [[ "${DEVICE_RESERVED:-0}" != "1" ]]; then
+          echo "::warning::Skipping tests - no Samsung device available"
+          exit 0
+        fi
+
         # Test models
         python -m unittest discover -s backends/samsung/test/models -p "test_*.py"
 
 
@@ -156,7 +156,10 @@ def preprocess(
         # Apply custom backend-specific passes
         custom_passes = cls.get_custom_passes(compile_specs)
         for custom_pass in custom_passes:
-            custom_pass(device_edge_program.graph_module)
+            if getattr(custom_pass, "requires_exported_program", False):
+                custom_pass(device_edge_program)
+            else:
+                custom_pass(device_edge_program.graph_module)
 
         # Run decompositions if any
         if decomposition_table:
 
@@ -0,0 +1,20 @@
+# The following file contains all models that have been confirmed to be functional and tested for the Arm backend:
+- Conformer
+- Deit Tiny
+- DeepLab v3 (DL3)
+- Inception v3 (IC3)
+- Llama
+- Long Short-Term Memory (LSTM)
+- MobileNet v2 (MV2)
+- MobileNet v3 (MV3)
+- Some popular torch.nn.functional models (NN functional)
+- Some popular torch.nn.modules models (NN modules)
+- Some popular torch ops (Torch Functions)
+- Neural Super Sampler (NSS)
+- ResNet 18
+- Wav2Letter (W2L)
+- Stable Diffusion:
+    * CLIP Text Encoder (CLIP Text with Projection)
+    * Stable Diffusion 3 Transformer (SD3 Transformer)
+    * T5 Encoder
+    * VAE Encoder/Decoder (VAE)
@@ -113,6 +113,9 @@
 from .replace_scalar_with_tensor_pass import (  # noqa
     ReplaceScalarWithTensorByProfilePass,
 )
+from .rewrite_bool_bitwise_not_to_logical_not_pass import (  # noqa
+    RewriteBoolBitwiseNotToLogicalNotPass,
+)
 from .rewrite_bool_to_fp32_cast_via_int8_pass import (  # noqa
     RewriteBoolToFp32CastViaInt8Pass,
 )
 
@@ -8,6 +8,7 @@
 from abc import abstractmethod
 from typing import Any, List, Optional, Set, Type
 
+from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
 from executorch.exir.pass_base import ExportPass, NodeMetadata
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_base import PassResult
@@ -16,9 +17,23 @@
 class ArmPass(ExportPass):
     """Base class for Arm passes"""
 
-    def __init__(self) -> None:
-        super().__init__()
+    def __init__(self, tfa_pass: bool = False, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
         self.submodule_depth = 0
+        self.is_tfa_pass = tfa_pass
+
+    def allowed_to_transform(self, meta: NodeMetadata | dict[str, Any]) -> bool:
+        if not self.is_tfa_pass:
+            return True
+
+        if isinstance(meta, NodeMetadata):
+            meta_dict = meta.data
+        else:
+            meta_dict = meta
+
+        disallow_tfa = meta_dict.get(DISALLOW_TFA_META_KEY, False)
+
+        return not disallow_tfa
 
     @property
     @abstractmethod
 
@@ -103,6 +103,7 @@
     RemoveNoopPass,
     ReplaceInfAndLimitValuesPass,
     ReplaceScalarWithTensorByProfilePass,
+    RewriteBoolBitwiseNotToLogicalNotPass,
     RewriteBoolToFp32CastViaInt8Pass,
     RewriteConvPass,
     RewriteMatmulPass,
@@ -222,6 +223,7 @@ def _tosa_pipeline(
         self.add_passes(
             [
                 FuseQuantizedActivationPass(),
+                RewriteBoolBitwiseNotToLogicalNotPass(),
                 RewriteBoolToFp32CastViaInt8Pass(),
                 ConvertToClampPass(),
                 DecomposeTOSAUnsupportedClampPass(),
@@ -376,65 +378,65 @@ def transform_to_backend_pipeline(
 
     def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         # Preprocessing passes
-        self.add_pass(RemoveGraphAssertsPass())
+        self.add_pass(RemoveGraphAssertsPass(tfa_pass=True))
 
         # Transformation passes (pre scalar -> tensor)
         self.add_passes(
             [
-                DecomposeSelectScatterPass(),
-                ConvertInt64ConstOpsToInt32Pass(),
-                ConvertInt64OutputOpsToInt32Pass(),
-                InsertInt32CastsAfterInt64PlaceholdersPass(),
-                DecomposeEmbeddingPass(),
-                DecomposeScaledDotProductAttentionPass(),
-                DecomposeRoundPass(),
-                DecomposeLogitPass(),
-                PromoteBoolOperandsPass(),
-                DecomposeSignPass(),
-                DecomposeAddmmPass(),
-                DecomposeRemainderPass(),
-                DecomposeFloorDividePass(),
-                DecomposeDivTensorModePass(),
+                DecomposeSelectScatterPass(tfa_pass=True),
+                ConvertInt64ConstOpsToInt32Pass(tfa_pass=True),
+                ConvertInt64OutputOpsToInt32Pass(tfa_pass=True),
+                InsertInt32CastsAfterInt64PlaceholdersPass(tfa_pass=True),
+                DecomposeEmbeddingPass(tfa_pass=True),
+                DecomposeScaledDotProductAttentionPass(tfa_pass=True),
+                DecomposeRoundPass(tfa_pass=True),
+                DecomposeLogitPass(tfa_pass=True),
+                PromoteBoolOperandsPass(tfa_pass=True),
+                DecomposeSignPass(tfa_pass=True),
+                DecomposeAddmmPass(tfa_pass=True),
+                DecomposeRemainderPass(tfa_pass=True),
+                DecomposeFloorDividePass(tfa_pass=True),
+                DecomposeDivTensorModePass(tfa_pass=True),
             ]
         )
 
         # Scalars -> tensors
         self.add_passes(
             [
-                ReplaceScalarWithTensorByProfilePass(),
-                ScalarsToAttributePass(),
+                ReplaceScalarWithTensorByProfilePass(tfa_pass=True),
+                ScalarsToAttributePass(tfa_pass=True),
             ]
         )
 
         # Transformation passes (post scalar removal)
         self.add_passes(
             [
-                NormalizeWhileInitialArgsPass(use_exir_clone=False),
-                DecomposeAddSubAlphaPass(),
-                DecomposeGroupNormPass(),
-                DecomposeLayerNormPass(),
-                DecomposeVarPass(),
-                DecomposeMeanDimPass(graph_module, self.tosa_spec),
-                DecomposeNotEqualPass(),
-                DecomposeCosineSimilarityPass(),
-                DecomposeGluPass(),
-                DecomposeDivPass(),
-                DecomposeLeakyReLUPass(),
-                DecomposeLinalgVectorNormPass(),
-                DecomposeSqrtPass(),
-                DecomposeSiluPass(),
-                DecomposeAvgPool2dPass(),
-                DecomposeSoftmaxUnstablePass(),
-                DecomposeSoftmaxPass(),
-                ConvertMinMaxPass(),
+                NormalizeWhileInitialArgsPass(use_exir_clone=False, tfa_pass=True),
+                DecomposeAddSubAlphaPass(tfa_pass=True),
+                DecomposeGroupNormPass(tfa_pass=True),
+                DecomposeLayerNormPass(tfa_pass=True),
+                DecomposeVarPass(tfa_pass=True),
+                DecomposeMeanDimPass(graph_module, self.tosa_spec, tfa_pass=True),
+                DecomposeNotEqualPass(tfa_pass=True),
+                DecomposeCosineSimilarityPass(tfa_pass=True),
+                DecomposeGluPass(tfa_pass=True),
+                DecomposeDivPass(tfa_pass=True),
+                DecomposeLeakyReLUPass(tfa_pass=True),
+                DecomposeLinalgVectorNormPass(tfa_pass=True),
+                DecomposeSqrtPass(tfa_pass=True),
+                DecomposeSiluPass(tfa_pass=True),
+                DecomposeAvgPool2dPass(tfa_pass=True),
+                DecomposeSoftmaxUnstablePass(tfa_pass=True),
+                DecomposeSoftmaxPass(tfa_pass=True),
+                ConvertMinMaxPass(tfa_pass=True),
             ]
         )
 
         # Postprocessing passes
         self.add_passes(
             [
-                ReplaceInfAndLimitValuesPass(),
-                DecomposeMaskedFillPass(),
+                ReplaceInfAndLimitValuesPass(tfa_pass=True),
+                DecomposeMaskedFillPass(tfa_pass=True),
             ]
         )
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-7a064ed3eafa43f17412d434b395240c727b3000`
	`1`	`+7a79b41e29a790ebb4b530eb98a89381e2d7de29`
Original file line number	Diff line number	Diff line change
`@@ -113,6 +113,9 @@`
`113`	`113`	`from .replace_scalar_with_tensor_pass import ( # noqa`
`114`	`114`	`ReplaceScalarWithTensorByProfilePass,`
`115`	`115`	`)`
	`116`	`+from .rewrite_bool_bitwise_not_to_logical_not_pass import ( # noqa`
	`117`	`+ RewriteBoolBitwiseNotToLogicalNotPass,`
	`118`	`+)`
`116`	`119`	`from .rewrite_bool_to_fp32_cast_via_int8_pass import ( # noqa`
`117`	`120`	`RewriteBoolToFp32CastViaInt8Pass,`
`118`	`121`	`)`