pytorch
diff --git a/‎.ci/scripts/gather_test_models.py‎
Lines changed: 11 additions & 9 deletions b/‎.ci/scripts/gather_test_models.py‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎.ci/scripts/setup-macos.sh‎
Lines changed: 3 additions & 0 deletions b/‎.ci/scripts/setup-macos.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/apple.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎backends/apple/coreml/runtime/test/ETCoreMLModelDebuggerTests.mm‎
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/runtime/test/ETCoreMLModelDebuggerTests.mm‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/test/ETCoreMLModelProfilerTests.mm‎
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/runtime/test/ETCoreMLModelProfilerTests.mm‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/_passes/cast_int64_pass.py‎
Lines changed: 32 additions & 11 deletions b/‎backends/arm/_passes/cast_int64_pass.py‎
Lines changed: 32 additions & 11 deletions
diff --git a/‎backends/arm/_passes/scalars_to_attribute_pass.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/arm/_passes/scalars_to_attribute_pass.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_scalars.py‎
Lines changed: 9 additions & 0 deletions b/‎backends/arm/test/ops/test_scalars.py‎
Lines changed: 9 additions & 0 deletions
@@ -20,16 +20,16 @@
 CUSTOM_RUNNERS = {
     "linux": {
         # This one runs OOM on smaller runner, the root cause is unclear (T163016365)
-        "w2l": "linux.12xlarge",
-        "ic4": "linux.12xlarge",
-        "resnet50": "linux.12xlarge",
-        "llava": "linux.12xlarge",
-        "llama3_2_vision_encoder": "linux.12xlarge",
-        # "llama3_2_text_decoder": "linux.12xlarge",  # TODO: re-enable test when Huy's change is in / model gets smaller.
+        "w2l": "linux.4xlarge.memory",
+        "ic4": "linux.4xlarge.memory",
+        "resnet50": "linux.4xlarge.memory",
+        "llava": "linux.4xlarge.memory",
+        "llama3_2_vision_encoder": "linux.4xlarge.memory",
+        "llama3_2_text_decoder": "linux.4xlarge.memory",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
-        "dl3": "linux.12xlarge",
-        "emformer_join": "linux.12xlarge",
-        "emformer_predict": "linux.12xlarge",
+        "dl3": "linux.4xlarge.memory",
+        "emformer_join": "linux.4xlarge.memory",
+        "emformer_predict": "linux.4xlarge.memory",
     }
 }
 
@@ -39,10 +39,12 @@
     "linux": {
         "mobilebert": 90,
         "emformer_predict": 360,
+        "llama3_2_text_decoder": 360,
     },
     "macos": {
         "mobilebert": 90,
         "emformer_predict": 360,
+        "llama3_2_text_decoder": 360,
     },
 }
 
 
@@ -49,6 +49,9 @@ install_buck() {
 
   rm "${BUCK2}"
   popd
+
+  # Kill all running buck2 daemon for a fresh start
+  buck2 killall || true
 }
 
 function write_sccache_stub() {
 
@@ -42,6 +42,8 @@ jobs:
 
   build-demo-ios:
     name: build-demo-ios
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
@@ -190,6 +192,8 @@ jobs:
         ) done
 
   upload-frameworks-ios:
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     runs-on: ubuntu-22.04
     needs: [build-frameworks-ios, set-version]
     timeout-minutes: 30
@@ -278,6 +282,8 @@ jobs:
 
   build-benchmark-app:
     name: build-benchmark-app
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
 
@@ -332,7 +332,7 @@ jobs:
       docker-image: executorch-ubuntu-22.04-clang12
 
   unittest-arm:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
 
@@ -131,7 +131,7 @@ jobs:
 
   test-arm-backend-delegation:
     name: test-arm-backend-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -157,7 +157,7 @@ jobs:
 
   test-arm-reference-delegation:
     name: test-arm-reference-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -351,6 +351,8 @@ jobs:
         done
 
   test-huggingface-transformers:
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     name: test-huggingface-transformers
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     secrets: inherit
 
@@ -151,7 +151,6 @@ - (void)testMV3ProgramDebugging {
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);
-    XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);
 }
 
@@ -146,7 +146,6 @@ - (void)testMV3ProgramProfiling {
             XCTAssertNotNil(profilingResult[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);
             XCTAssertNotNil(profilingResult[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);
             XCTAssertNotNil(profilingResult[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);
-            XCTAssertNotNil(profilingResult[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);
             XCTAssertNotNil(profilingResult[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);
             XCTAssertNotNil(profilingResult[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);
         };
 
@@ -5,8 +5,15 @@
 
 # pyre-unsafe
 
+import logging
+
 import torch
+from executorch.backends.arm._passes.arm_pass_utils import is_param_node
 from executorch.exir.pass_base import ExportPass, PassResult
+from torch._export.utils import is_buffer
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
 
 
 class CastInt64ToInt32Pass(ExportPass):
@@ -18,17 +25,31 @@ def _to_int32(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
             fake_tensor = node.meta["val"]
             if isinstance(fake_tensor, torch._subclasses.fake_tensor.FakeTensor):
-                if node.meta["val"].dtype == torch.int64:
-                    node.meta["val"] = node.meta["val"].to(torch.int32)
-                    buffer_name = (
-                        self.exported_program.graph_signature.inputs_to_buffers[
-                            node.name
-                        ]
-                    )
-                    new_tensor = self.exported_program.state_dict[buffer_name].to(
-                        torch.int32
-                    )
-                    self.exported_program.state_dict[buffer_name] = new_tensor
+                if node.meta["val"].dtype == torch.int64 and is_param_node(
+                    self.exported_program, node
+                ):
+                    if is_buffer(self.exported_program, node):
+                        node.meta["val"] = node.meta["val"].to(torch.int32)
+                        buffer_name = (
+                            self.exported_program.graph_signature.inputs_to_buffers[
+                                node.name
+                            ]
+                        )
+                        buffer = self.exported_program.state_dict[node.name]
+                        logger.warning(
+                            f"Casting buffer {node.name} from torch.int64 to torch.int32"
+                            f" defined in {node.meta['stack_trace']}"
+                        )
+                        if torch.min(buffer) < torch.iinfo(torch.int32).min:
+                            raise RuntimeError(
+                                f"Buffer {node.name} has value < {torch.iinfo(torch.int32).min}"
+                            )
+                        if torch.max(buffer) > torch.iinfo(torch.int32).max:
+                            raise RuntimeError(
+                                f"Buffer {node.name} has value > {torch.iinfo(torch.int32).max}"
+                            )
+                        buffer_int32 = buffer.to(torch.int32)
+                        self.exported_program.state_dict[buffer_name] = buffer_int32
 
     def call(self, graph_module: torch.fx.GraphModule):
         self._to_int32(graph_module)
 
@@ -51,6 +51,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
                 if isinstance(arg, Node):
                     new_args.append(arg)
                     continue
+                if isinstance(arg, int) and not torch.is_floating_point(
+                    get_first_fake_tensor(n)
+                ):
+                    new_args.append(arg)
+                    continue
 
                 prefix = "_tensor_constant_"
                 get_new_attr_name = get_new_attr_name_with_prefix(prefix)
 
@@ -75,6 +75,12 @@ def forward(self, x):
             x = 1.0 + x
             return x
 
+    class ShiftInplaceSub(torch.nn.Module):
+        def forward(self, x):
+            x = x >> 4
+            x -= 10
+            return x
+
     # Inplace ops end with '_' (from aten naming)
     ops = [
         ("Add", Add()),
@@ -160,3 +166,6 @@ def test_MI_const(self, test_name: str, op: torch.nn.Module, x):
     @parameterized.expand(tensor_scalar_tests)
     def test_BI(self, test_name: str, op: torch.nn.Module, x, y):
         self._test_add_tosa_BI_pipeline(op, (x, y))
+
+    def test_shift_sub_inplace_tosa_MI(self):
+        self._test_add_tosa_MI_pipeline(self.ShiftInplaceSub(), (torch.IntTensor(5),))
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,9 @@ install_buck() {`
`49`	`49`
`50`	`50`	`rm "${BUCK2}"`
`51`	`51`	`popd`
	`52`	`+`
	`53`	`+ # Kill all running buck2 daemon for a fresh start`
	`54`	`+ buck2 killall \|\| true`
`52`	`55`	`}`
`53`	`56`
`54`	`57`	`function write_sccache_stub() {`
Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,6 @@ - (void)testMV3ProgramDebugging {`
`151`	`151`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);`
`152`	`152`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);`
`153`	`153`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);`
`154`		`- XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);`
`155`	`154`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);`
`156`	`155`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);`
`157`	`156`	`}`