pytorch
diff --git a/‎.github/workflows/_link_check.yml‎
Lines changed: 41 additions & 0 deletions b/‎.github/workflows/_link_check.yml‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 2 additions & 21 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 2 additions & 21 deletions
diff --git a/‎.github/workflows/nightly.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/nightly.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm‎
Lines changed: 76 additions & 0 deletions b/‎backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/fuse_equal_placeholders_pass.py‎
Lines changed: 83 additions & 0 deletions b/‎backends/arm/_passes/fuse_equal_placeholders_pass.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎backends/arm/scripts/build_executorch.sh‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/scripts/build_executorch.sh‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,41 @@
+on:
+  workflow_call:
+    inputs:
+      ref:
+        type: string
+        required: true
+
+jobs:
+  lint-urls:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-linter
+      submodules: 'none'
+      fetch-depth: 0
+      ref: ${{ inputs.ref }}
+      timeout: 90
+      script: |
+        ./scripts/lint_urls.sh $(
+          [ "${{ github.event_name }}" = "pull_request" ] \
+            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
+          || [ "${{ github.event_name }}" = "push" ] \
+            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
+        )
+
+  lint-xrefs:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-linter
+      submodules: 'none'
+      fetch-depth: 0
+      ref: ${{ inputs.ref }}
+      timeout: 90
+      script: |
+        ./scripts/lint_xrefs.sh $(
+          [ "${{ github.event_name }}" = "pull_request" ] \
+            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
+          || [ "${{ github.event_name }}" = "push" ] \
+            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
+        )
@@ -64,29 +64,10 @@ jobs:
 
         exit $RC
 
-  lint-urls:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  link-check:
+    uses: ./.github/workflows/_link_check.yml
     with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
-      fetch-depth: 0
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        ./scripts/lint_urls.sh
-
-  lint-xrefs:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
-      fetch-depth: 0
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        ./scripts/lint_xrefs.sh
 
   android-java-format:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -30,3 +30,9 @@ jobs:
           test-infra-ref: main
           updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
           pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
+
+  link-check:
+    needs: update-pytorch-commit-hash
+    uses: ./.github/workflows/_link_check.yml
+    with:
+      ref: ${{ github.sha }}
@@ -51,7 +51,7 @@ To get started you can:
 
 - Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
-- Jump straight into LLM use cases by following specific instructions for [Llama](examples/models/llama/README.md) and [Llava](examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
 
 ## Feedback and Engagement
 
 
@@ -15,6 +15,9 @@
 #import <XCTest/XCTest.h>
 #import <executorch/runtime/platform/runtime.h>
 #import <model_logging_options.h>
+#import <multiarray.h>
+
+using namespace executorchcoreml;
 
 @interface ETCoreMLModelManagerTests : XCTestCase
 
@@ -148,4 +151,77 @@ - (void)testMulModelExecution {
     }
 }
 
+// See https://github.com/pytorch/executorch/pull/10465
+- (void)testAutoreleasepoolError {
+    NSURL *modelURL = [self.class bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSError *localError = nil;
+    XCTAssertNotNil(modelURL);
+
+    NSData *modelData = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+    ModelHandle *modelHandle = [self.modelManager loadModelFromAOTData:modelData
+                                                           configuration:configuration
+                                                                   error:&localError];
+    XCTAssert(modelHandle);
+
+    ETCoreMLModel *model = [self.modelManager modelWithHandle:modelHandle];
+    XCTAssert(model);
+
+    NSArray<MLMultiArray *> *inputArrays =
+        [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(2), @(3)] error:&localError];
+    XCTAssert(inputArrays);
+
+    std::vector<MultiArray> multiArrays;
+    multiArrays.reserve(inputArrays.count + model.orderedOutputNames.count);
+    for (MLMultiArray *array in inputArrays) {
+        auto dataTypeOpt = to_multiarray_data_type(array.dataType);
+        XCTAssert(dataTypeOpt.has_value());
+        auto dataType = dataTypeOpt.value();
+
+        std::vector<size_t> dims;
+        for (NSNumber *n in array.shape) {
+            dims.push_back(n.unsignedLongValue);
+        }
+
+        std::vector<ssize_t> strides(dims.size());
+        ssize_t currentStride = 1;
+        for (NSInteger i = dims.size() - 1; i >= 0; --i) {
+            strides[i] = currentStride;
+            currentStride *= dims[i];
+        }
+
+        multiArrays.emplace_back(array.dataPointer,
+                                 MultiArray::MemoryLayout(dataType, dims, strides));
+    }
+
+    auto inputLayout = multiArrays[0].layout();
+    size_t bufferSize = inputLayout.num_bytes();
+    for (NSUInteger i = 0; i < model.orderedOutputNames.count; ++i) {
+        multiArrays.emplace_back(calloc(1, bufferSize), inputLayout);
+    }
+    // corrupt first input shape to force error
+    {
+        auto originalLayout = multiArrays[0].layout();
+        auto corruptedDims = originalLayout.shape();
+        corruptedDims[0] += 1;
+        multiArrays[0] = MultiArray(multiArrays[0].data(),
+                                    MultiArray::MemoryLayout(originalLayout.dataType(),
+                                                             corruptedDims,
+                                                             originalLayout.strides()));
+    }
+
+    BOOL success = [self.modelManager executeModelWithHandle:modelHandle
+                                                    argsVec:multiArrays
+                                             loggingOptions:ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError];
+    XCTAssertFalse(success);
+    XCTAssertNotNil(localError);
+
+    for (size_t i = inputArrays.count; i < multiArrays.size(); ++i) {
+        free(multiArrays[i].data());
+    }
+}
+
 @end
@@ -39,6 +39,7 @@
 )
 from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass  # noqa
 from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass  # noqa
+from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass  # noqa
 from .fuse_quantized_activation_pass import FuseQuantizedActivationPass  # noqa
 from .insert_rescales_pass import InsertRescalePass  # noqa
 from .insert_table_ops import InsertTableOpsPass  # noqa
 
@@ -40,6 +40,7 @@
     FoldAndAnnotateQParamsPass,
     FuseBatchnorm2DPass,
     FuseConstantArgsPass,
+    FuseEqualPlaceholdersPass,
     FuseQuantizedActivationPass,
     InsertRescalePass,
     InsertTableOpsPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseConstantArgsPass(exported_program))
 
         self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
         self.add_pass(InsertRescalePass())
 
@@ -164,6 +166,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseViewCopyTransform())
         self.add_pass(FuseConstantArgsPass(exported_program))
         self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
         self.add_pass(InsertRescalePass())
 
 
@@ -0,0 +1,83 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import (
+    get_constant_placeholder_kind,
+    get_param_tensor,
+    is_param_node,
+)
+from executorch.backends.transforms.utils import (
+    create_constant_placeholder,
+    delete_constant_placeholder,
+)
+from executorch.exir import ExportedProgram
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class FuseEqualPlaceholdersPass(ExportPass):
+    """
+    This pass optimizes memory usage by finding constant placeholders
+    pointing to identical tensors and fusing them to one single placeholder
+    with multiple users.
+    """
+
+    def __init__(self, exported_program: ExportedProgram):
+        self.exported_program = exported_program
+        super().__init__()
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        modified = False
+        const_placeholder_nodes = []
+        for node in graph_module.graph.nodes:
+            if is_param_node(self.exported_program, node):
+                const_placeholder_nodes.append(node)
+
+        while const_placeholder_nodes:
+
+            # Find equal tensors
+            node1 = const_placeholder_nodes.pop()
+            eq_nodes = [node1]
+            tensor1 = get_param_tensor(self.exported_program, node1)
+            if tensor1 is None:
+                continue
+
+            for node2 in const_placeholder_nodes:
+                tensor2 = get_param_tensor(self.exported_program, node2)
+                if tensor2 is None:
+                    continue
+
+                if torch.equal(tensor1, tensor2):
+                    eq_nodes.append(node2)
+
+            if len(eq_nodes) > 1:
+                common_name = node1.name + "_common"
+                common_kind = get_constant_placeholder_kind(
+                    self.exported_program, node1
+                )
+                common_persisten_buffer = True
+
+                with graph_module.graph.inserting_before(node1):
+                    common_node = create_constant_placeholder(
+                        self.exported_program,
+                        graph_module.graph,
+                        common_name,
+                        common_kind,
+                        tensor1,
+                        common_persisten_buffer,
+                    )
+
+                for eq_node in eq_nodes:
+                    eq_node.replace_all_uses_with(common_node)
+                    delete_constant_placeholder(self.exported_program, eq_node)
+                    if eq_node != node1:
+                        const_placeholder_nodes.remove(eq_node)
+
+                modified = True
+
+        if modified:
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+        return PassResult(graph_module=graph_module, modified=modified)
@@ -137,7 +137,7 @@ cmake                                                 \
 
 echo "[$(basename $0)] Configured CMAKE"
 
-cmake --build ${et_build_dir} --parallel --target install --config ${build_type} --
+cmake --build ${et_build_dir} -j$(nproc) --target install --config ${build_type} --
 
 set +x
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@`
`39`	`39`	`)`
`40`	`40`	`from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa`
`41`	`41`	`from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa`
	`42`	`+from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa`
`42`	`43`	`from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa`
`43`	`44`	`from .insert_rescales_pass import InsertRescalePass # noqa`
`44`	`45`	`from .insert_table_ops import InsertTableOpsPass # noqa`