Skip to content

Commit f53e004

Browse files
Update on "[ExecuTorch][#10447] Extend PyBundledModule with extension.BundledModule"
#10447 # Context This issue is a step of #9638. In #9638, we want to have `extension.Module` as the single source of implementation in `pybindings`, which means that `pybindings.PyModule` should use `extension.Module` rather than its own `pybindings.Module`. # Proposal Now that we have `extension.BundledModule` ready, we want to test it out by having our existing `PyBundledModule` to extend it, and let `verify_result_with_bundled_expected_output` to use it, so that we can test out the whole thing with https://github.com/pytorch/executorch/blob/fb45e19055a92d2a91a4d4b7008e135232cbb14b/devtools/bundled_program/test/test_end2end.py Differential Revision: [D73564127](https://our.internmc.facebook.com/intern/diff/D73564127/) [ghstack-poisoned]
2 parents 580d013 + 7e42657 commit f53e004

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+784
-93
lines changed

.github/workflows/_link_check.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
on:
2+
workflow_call:
3+
inputs:
4+
ref:
5+
type: string
6+
required: true
7+
8+
jobs:
9+
lint-urls:
10+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
11+
with:
12+
runner: linux.2xlarge
13+
docker-image: executorch-ubuntu-22.04-linter
14+
submodules: 'none'
15+
fetch-depth: 0
16+
ref: ${{ inputs.ref }}
17+
timeout: 90
18+
script: |
19+
./scripts/lint_urls.sh $(
20+
[ "${{ github.event_name }}" = "pull_request" ] \
21+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
22+
|| [ "${{ github.event_name }}" = "push" ] \
23+
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
24+
)
25+
26+
lint-xrefs:
27+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
28+
with:
29+
runner: linux.2xlarge
30+
docker-image: executorch-ubuntu-22.04-linter
31+
submodules: 'none'
32+
fetch-depth: 0
33+
ref: ${{ inputs.ref }}
34+
timeout: 90
35+
script: |
36+
./scripts/lint_xrefs.sh $(
37+
[ "${{ github.event_name }}" = "pull_request" ] \
38+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
39+
|| [ "${{ github.event_name }}" = "push" ] \
40+
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
41+
)

.github/workflows/lint.yml

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,29 +64,10 @@ jobs:
6464
6565
exit $RC
6666
67-
lint-urls:
68-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
67+
link-check:
68+
uses: ./.github/workflows/_link_check.yml
6969
with:
70-
runner: linux.2xlarge
71-
docker-image: executorch-ubuntu-22.04-linter
72-
submodules: 'none'
73-
fetch-depth: 0
7470
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
75-
timeout: 90
76-
script: |
77-
./scripts/lint_urls.sh
78-
79-
lint-xrefs:
80-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
81-
with:
82-
runner: linux.2xlarge
83-
docker-image: executorch-ubuntu-22.04-linter
84-
submodules: 'none'
85-
fetch-depth: 0
86-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
87-
timeout: 90
88-
script: |
89-
./scripts/lint_xrefs.sh
9071

9172
android-java-format:
9273
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.github/workflows/nightly.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,9 @@ jobs:
3030
test-infra-ref: main
3131
updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
3232
pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
33+
34+
link-check:
35+
needs: update-pytorch-commit-hash
36+
uses: ./.github/workflows/_link_check.yml
37+
with:
38+
ref: ${{ github.sha }}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ To get started you can:
5151

5252
- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
5353
- Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
54-
- Jump straight into LLM use cases by following specific instructions for [Llama](examples/models/llama/README.md) and [Llava](examples/models/llava/README.md)
54+
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
5555

5656
## Feedback and Engagement
5757

backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#import <XCTest/XCTest.h>
1616
#import <executorch/runtime/platform/runtime.h>
1717
#import <model_logging_options.h>
18+
#import <multiarray.h>
19+
20+
using namespace executorchcoreml;
1821

1922
@interface ETCoreMLModelManagerTests : XCTestCase
2023

@@ -148,4 +151,77 @@ - (void)testMulModelExecution {
148151
}
149152
}
150153

154+
// See https://github.com/pytorch/executorch/pull/10465
155+
- (void)testAutoreleasepoolError {
156+
NSURL *modelURL = [self.class bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
157+
NSError *localError = nil;
158+
XCTAssertNotNil(modelURL);
159+
160+
NSData *modelData = [NSData dataWithContentsOfURL:modelURL];
161+
MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
162+
configuration.computeUnits = MLComputeUnitsAll;
163+
ModelHandle *modelHandle = [self.modelManager loadModelFromAOTData:modelData
164+
configuration:configuration
165+
error:&localError];
166+
XCTAssert(modelHandle);
167+
168+
ETCoreMLModel *model = [self.modelManager modelWithHandle:modelHandle];
169+
XCTAssert(model);
170+
171+
NSArray<MLMultiArray *> *inputArrays =
172+
[ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(2), @(3)] error:&localError];
173+
XCTAssert(inputArrays);
174+
175+
std::vector<MultiArray> multiArrays;
176+
multiArrays.reserve(inputArrays.count + model.orderedOutputNames.count);
177+
for (MLMultiArray *array in inputArrays) {
178+
auto dataTypeOpt = to_multiarray_data_type(array.dataType);
179+
XCTAssert(dataTypeOpt.has_value());
180+
auto dataType = dataTypeOpt.value();
181+
182+
std::vector<size_t> dims;
183+
for (NSNumber *n in array.shape) {
184+
dims.push_back(n.unsignedLongValue);
185+
}
186+
187+
std::vector<ssize_t> strides(dims.size());
188+
ssize_t currentStride = 1;
189+
for (NSInteger i = dims.size() - 1; i >= 0; --i) {
190+
strides[i] = currentStride;
191+
currentStride *= dims[i];
192+
}
193+
194+
multiArrays.emplace_back(array.dataPointer,
195+
MultiArray::MemoryLayout(dataType, dims, strides));
196+
}
197+
198+
auto inputLayout = multiArrays[0].layout();
199+
size_t bufferSize = inputLayout.num_bytes();
200+
for (NSUInteger i = 0; i < model.orderedOutputNames.count; ++i) {
201+
multiArrays.emplace_back(calloc(1, bufferSize), inputLayout);
202+
}
203+
// corrupt first input shape to force error
204+
{
205+
auto originalLayout = multiArrays[0].layout();
206+
auto corruptedDims = originalLayout.shape();
207+
corruptedDims[0] += 1;
208+
multiArrays[0] = MultiArray(multiArrays[0].data(),
209+
MultiArray::MemoryLayout(originalLayout.dataType(),
210+
corruptedDims,
211+
originalLayout.strides()));
212+
}
213+
214+
BOOL success = [self.modelManager executeModelWithHandle:modelHandle
215+
argsVec:multiArrays
216+
loggingOptions:ModelLoggingOptions()
217+
eventLogger:nullptr
218+
error:&localError];
219+
XCTAssertFalse(success);
220+
XCTAssertNotNil(localError);
221+
222+
for (size_t i = inputArrays.count; i < multiArrays.size(); ++i) {
223+
free(multiArrays[i].data());
224+
}
225+
}
226+
151227
@end

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa
4141
from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa
42+
from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa
4243
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
4344
from .insert_rescales_pass import InsertRescalePass # noqa
4445
from .insert_table_ops import InsertTableOpsPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
FoldAndAnnotateQParamsPass,
4141
FuseBatchnorm2DPass,
4242
FuseConstantArgsPass,
43+
FuseEqualPlaceholdersPass,
4344
FuseQuantizedActivationPass,
4445
InsertRescalePass,
4546
InsertTableOpsPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
113114
self.add_pass(FuseConstantArgsPass(exported_program))
114115

115116
self.add_pass(InsertTableOpsPass(exported_program))
117+
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
116118
self.add_pass(AnnotateChannelsLastDimOrder())
117119
self.add_pass(InsertRescalePass())
118120

@@ -164,6 +166,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
164166
self.add_pass(FuseViewCopyTransform())
165167
self.add_pass(FuseConstantArgsPass(exported_program))
166168
self.add_pass(InsertTableOpsPass(exported_program))
169+
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
167170
self.add_pass(AnnotateChannelsLastDimOrder())
168171
self.add_pass(InsertRescalePass())
169172

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import torch
7+
from executorch.backends.arm._passes.arm_pass_utils import (
8+
get_constant_placeholder_kind,
9+
get_param_tensor,
10+
is_param_node,
11+
)
12+
from executorch.backends.transforms.utils import (
13+
create_constant_placeholder,
14+
delete_constant_placeholder,
15+
)
16+
from executorch.exir import ExportedProgram
17+
from executorch.exir.pass_base import ExportPass, PassResult
18+
19+
20+
class FuseEqualPlaceholdersPass(ExportPass):
21+
"""
22+
This pass optimizes memory usage by finding constant placeholders
23+
pointing to identical tensors and fusing them to one single placeholder
24+
with multiple users.
25+
"""
26+
27+
def __init__(self, exported_program: ExportedProgram):
28+
self.exported_program = exported_program
29+
super().__init__()
30+
31+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
32+
modified = False
33+
const_placeholder_nodes = []
34+
for node in graph_module.graph.nodes:
35+
if is_param_node(self.exported_program, node):
36+
const_placeholder_nodes.append(node)
37+
38+
while const_placeholder_nodes:
39+
40+
# Find equal tensors
41+
node1 = const_placeholder_nodes.pop()
42+
eq_nodes = [node1]
43+
tensor1 = get_param_tensor(self.exported_program, node1)
44+
if tensor1 is None:
45+
continue
46+
47+
for node2 in const_placeholder_nodes:
48+
tensor2 = get_param_tensor(self.exported_program, node2)
49+
if tensor2 is None:
50+
continue
51+
52+
if torch.equal(tensor1, tensor2):
53+
eq_nodes.append(node2)
54+
55+
if len(eq_nodes) > 1:
56+
common_name = node1.name + "_common"
57+
common_kind = get_constant_placeholder_kind(
58+
self.exported_program, node1
59+
)
60+
common_persisten_buffer = True
61+
62+
with graph_module.graph.inserting_before(node1):
63+
common_node = create_constant_placeholder(
64+
self.exported_program,
65+
graph_module.graph,
66+
common_name,
67+
common_kind,
68+
tensor1,
69+
common_persisten_buffer,
70+
)
71+
72+
for eq_node in eq_nodes:
73+
eq_node.replace_all_uses_with(common_node)
74+
delete_constant_placeholder(self.exported_program, eq_node)
75+
if eq_node != node1:
76+
const_placeholder_nodes.remove(eq_node)
77+
78+
modified = True
79+
80+
if modified:
81+
graph_module.recompile()
82+
graph_module = super().call(graph_module).graph_module
83+
return PassResult(graph_module=graph_module, modified=modified)

backends/arm/scripts/build_executorch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ cmake \
137137

138138
echo "[$(basename $0)] Configured CMAKE"
139139

140-
cmake --build ${et_build_dir} --parallel --target install --config ${build_type} --
140+
cmake --build ${et_build_dir} -j$(nproc) --target install --config ${build_type} --
141141

142142
set +x
143143

0 commit comments

Comments
 (0)