Skip to content

Commit d19e0db

Browse files
committed
Update
[ghstack-poisoned]
2 parents 5fc71bd + 7cb8295 commit d19e0db

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+847
-1002
lines changed

.ci/scripts/zephyr-utils.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
# LICENSE file in the root directory of this source tree.
77

88
download_arm_zephyr_sdk () {
9-
wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.0/zephyr-sdk-0.16.0_linux-x86_64.tar.xz
10-
tar -xf zephyr-sdk-0.16.0_linux-x86_64.tar.xz
11-
rm -f zephyr-sdk-0.16.0_linux-x86_64.tar.xz
9+
wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/zephyr-sdk-0.17.2_linux-x86_64.tar.xz
10+
tar -xf zephyr-sdk-0.17.2_linux-x86_64.tar.xz
11+
rm -f zephyr-sdk-0.17.2_linux-x86_64.tar.xz
1212
}
1313

1414
setup_zephyr_et_module () {

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
9393
# TODO @Bujji: Should see if this can be moved into the docker image itself
9494
download_arm_zephyr_sdk
95-
./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
95+
./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
9696
cd $ZEPHYR_PROJ_ROOT
9797
setup_zephyr_et_module
9898

CMakePresets.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,36 @@
66
"hidden": true,
77
"binaryDir": "${sourceDir}/cmake-out"
88
},
9+
{
10+
"name": "android-arm64-v8a",
11+
"displayName": "Build executorch core and JNI bindings on android arm64-v8a",
12+
"inherits": ["common"],
13+
"binaryDir": "${sourceDir}/cmake-out-android-arm64-v8a",
14+
"cacheVariables": {
15+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
16+
"ANDROID_ABI": "arm64-v8a"
17+
},
18+
"condition": {
19+
"type": "inList",
20+
"string": "${hostSystemName}",
21+
"list": ["Darwin", "Linux", "Windows"]
22+
}
23+
},
24+
{
25+
"name": "android-x86_64",
26+
"displayName": "Build executorch core and JNI bindings on android x86_64",
27+
"inherits": ["common"],
28+
"binaryDir": "${sourceDir}/cmake-out-android-x86_64",
29+
"cacheVariables": {
30+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
31+
"ANDROID_ABI": "x86_64"
32+
},
33+
"condition": {
34+
"type": "inList",
35+
"string": "${hostSystemName}",
36+
"list": ["Darwin", "Linux", "Windows"]
37+
}
38+
},
939
{
1040
"name": "macos",
1141
"displayName": "Build ExecuTorch for macOS",
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Copyright 2025 NXP
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import torch
7+
8+
from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass
9+
from executorch.backends.nxp.neutron_partitioner import QDQClusterRecognizer
10+
from executorch.exir.dialects._ops import ops as exir_ops
11+
from torch.fx import Node
12+
from torch.fx.passes.infra.pass_base import PassResult
13+
14+
15+
def insert_qdq_pair_after_node(
16+
graph: torch.fx.Graph, anchor: torch.fx.Node, q_params: tuple
17+
):
18+
# Insert a Quantize node.
19+
with graph.inserting_after(anchor):
20+
quantize_op = graph.create_node(
21+
op="call_function",
22+
target=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
23+
args=(), # Will be added later.
24+
)
25+
quantize_op.meta = anchor.meta
26+
27+
# Insert a Dequantize node.
28+
with graph.inserting_after(quantize_op):
29+
dequantize_op = graph.create_node(
30+
op="call_function",
31+
target=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
32+
args=(quantize_op,) + q_params,
33+
)
34+
dequantize_op.meta = quantize_op.meta
35+
anchor.replace_all_uses_with(dequantize_op)
36+
37+
# Add this at the end, so the `anchor.replace_all_uses_with(dequantize_op)` does not replace the first use of the
38+
# `quantize_op`.
39+
quantize_op.args = (anchor,) + q_params
40+
41+
42+
def _is_dequantize(node_: Node) -> bool:
43+
return (
44+
node_.op == "call_function"
45+
and node_.target
46+
== exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
47+
)
48+
49+
50+
def _is_quantize(node_: Node) -> bool:
51+
return (
52+
node_.op == "call_function"
53+
and node_.target
54+
== exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
55+
)
56+
57+
58+
class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
59+
"""
60+
61+
┌─────▼──────┐
62+
│ │ dequantize │
63+
┌─────▼──────┐ └─────┬──────┘
64+
│ dequantize │ ┌─────▼──────┐
65+
└─────┬──────┘ │ <aux_node> │
66+
┌─────▼──────┐ └─────┬──────┘
67+
│ <aux_node> │ ┌────▼─────┐ ┐
68+
└─────┬──────┘ │ quantize │ │
69+
┌──────────▼──────────┐ replaced with └────┬─────┘ │
70+
⋯┤ <main_cluster_node> ├⋯ ──────────────► │ │ newly added nodes
71+
└──────────┬──────────┘ ┌─────▼──────┐ │
72+
▼ │ dequantize │ │
73+
⋮ └─────┬──────┘ ┘
74+
┌────▼─────┐ ┌──────────▼──────────┐
75+
│ quantize │ ⋯┤ <main_cluster_node> ├⋯
76+
└────┬─────┘ └──────────┬──────────┘
77+
▼ ▼
78+
79+
┌────▼─────┐
80+
│ quantize │
81+
└────┬─────┘
82+
83+
"""
84+
85+
allowed_auxiliary_nodes = [exir_ops.edge.aten.view_copy.default]
86+
87+
# List of approved nodes to which the <aux_node> can be connected in order for the pass to make the modification.
88+
allowed_main_cluster_nodes = [
89+
exir_ops.edge.aten.addmm.default,
90+
exir_ops.edge.aten.mm.default,
91+
]
92+
93+
def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
94+
for aux_node in graph_module.graph.nodes:
95+
if (
96+
aux_node.op != "call_function"
97+
or aux_node.target not in self.allowed_auxiliary_nodes
98+
):
99+
continue
100+
101+
dequantize_node = aux_node.args[0]
102+
if not _is_dequantize(dequantize_node):
103+
# Not the intended use case.
104+
continue
105+
106+
users = list(aux_node.users.keys())
107+
if len(users) != 1:
108+
# Not the intended use case.
109+
continue
110+
111+
main_cluster_node = users[0]
112+
if (
113+
main_cluster_node.op != "call_function"
114+
or main_cluster_node.target not in self.allowed_main_cluster_nodes
115+
):
116+
# Unsupported `main_cluster_node`.
117+
continue
118+
119+
# Make sure the nodes are part of the same QDQ cluster.
120+
cluster = QDQClusterRecognizer().get_qdq_cluster(main_cluster_node)
121+
if any(
122+
node_ not in cluster
123+
for node_ in [dequantize_node, aux_node, main_cluster_node]
124+
):
125+
continue
126+
127+
# ---- The nodes follow the pattern described in the header. ----
128+
129+
q_params = dequantize_node.args[1:]
130+
insert_qdq_pair_after_node(graph_module.graph, aux_node, q_params)
131+
132+
# The graph has now changed, and we shouldn't keep iterating through it. Return the new graph and the parent
133+
# class will call this pass again.
134+
return PassResult(graph_module, True)
135+
136+
# Nothing was changed.
137+
return PassResult(graph_module, False)
138+
139+
140+
class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
141+
"""
142+
143+
┌─────▼──────┐
144+
│ │ dequantize │
145+
┌─────▼──────┐ └─────┬──────┘
146+
│ dequantize │ ⋮
147+
└─────┬──────┘ ┌──────────▼──────────┐
148+
▼ ⋯┤ <main_cluster_node> ├⋯
149+
⋮ └──────────┬──────────┘
150+
┌──────────▼──────────┐ replaced with ┌────▼─────┐ ┐
151+
⋯┤ <main_cluster_node> ├⋯ ──────────────► │ quantize │ │
152+
└──────────┬──────────┘ └────┬─────┘ │
153+
┌─────▼──────┐ │ │ newly added nodes
154+
│ <aux_node> │ ┌─────▼──────┐ │
155+
└─────┬──────┘ │ dequantize │ │
156+
┌────▼─────┐ └─────┬──────┘ ┘
157+
│ quantize │ ┌─────▼──────┐
158+
└────┬─────┘ │ <aux_node> │
159+
▼ └─────┬──────┘
160+
┌────▼─────┐
161+
│ quantize │
162+
└────┬─────┘
163+
164+
"""
165+
166+
allowed_auxiliary_nodes = [exir_ops.edge.aten.view_copy.default]
167+
168+
# List of approved nodes to which the `<aux_node>` can be connected in order for the pass to make the modification.
169+
allowed_main_cluster_nodes = [
170+
exir_ops.edge.aten.addmm.default,
171+
exir_ops.edge.aten.mm.default,
172+
]
173+
174+
def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
175+
176+
for aux_node in graph_module.graph.nodes:
177+
if (
178+
aux_node.op != "call_function"
179+
or aux_node.target not in self.allowed_auxiliary_nodes
180+
):
181+
continue
182+
183+
main_cluster_node = aux_node.args[0]
184+
if (
185+
main_cluster_node.op != "call_function"
186+
or main_cluster_node.target not in self.allowed_main_cluster_nodes
187+
):
188+
# Unsupported `main_cluster_node`.
189+
continue
190+
191+
users = list(aux_node.users.keys())
192+
if len(users) != 1:
193+
# Not the intended use case.
194+
continue
195+
196+
quantize_node = users[0]
197+
if not _is_quantize(quantize_node):
198+
# Not the intended use case.
199+
continue
200+
201+
# Make sure the nodes are part of the same QDQ cluster.
202+
cluster = QDQClusterRecognizer().get_qdq_cluster(main_cluster_node)
203+
if any(
204+
node_ not in cluster
205+
for node_ in [quantize_node, aux_node, main_cluster_node]
206+
):
207+
continue
208+
209+
# ---- The nodes follow the pattern described in the header. ----
210+
211+
q_params = quantize_node.args[1:]
212+
insert_qdq_pair_after_node(graph_module.graph, main_cluster_node, q_params)
213+
214+
# The graph has now changed, and we shouldn't keep iterating through it. Return the new graph and the parent
215+
# class will call this pass again.
216+
return PassResult(graph_module, True)
217+
218+
# Nothing was changed.
219+
return PassResult(graph_module, False)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright 2025 NXP
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import logging
7+
from abc import abstractmethod
8+
9+
import torch
10+
11+
from executorch.exir.pass_base import ExportPass
12+
from torch.fx.passes.infra.pass_base import PassResult
13+
14+
15+
class NeutronEdgePass(ExportPass):
16+
"""Abstract parent class for pre-processing passes on the edge dialect level."""
17+
18+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
19+
"""Call `self.run()` as long as changes are being made. After a pass modifies the graph, it cannot keep on
20+
iterating through its nodes, and must return. This method allows the pass to go through the whole model.
21+
"""
22+
23+
# Every pass will return once it makes a change to the graph, to avoid traversing and modifying a graph at the
24+
# same time. Therefore, it must be called multiple times (at most `iteration_limit` times).
25+
iteration_limit = len(graph_module.graph.nodes)
26+
modified = False
27+
for _ in range(iteration_limit):
28+
res = self.run(graph_module)
29+
if res.modified:
30+
modified = True
31+
graph_module = res.graph_module
32+
33+
else:
34+
# No more changes have been made.
35+
graph_module = self.recompile_module(graph_module)
36+
return PassResult(graph_module, modified)
37+
38+
# Iteration limit was reached.
39+
logging.warning(
40+
f"The NeutronEdgePass `{self.__class__.__name__}` reached the iteration limit."
41+
)
42+
graph_module = self.recompile_module(graph_module)
43+
return PassResult(graph_module, modified)
44+
45+
@abstractmethod
46+
def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
47+
"""Child classes should implement their graph modification here."""
48+
pass
49+
50+
def recompile_module(
51+
self, graph_module: torch.fx.GraphModule
52+
) -> torch.fx.GraphModule:
53+
"""Recompile the graph and re-trace the metadata. This should ensure that the datatypes and shapes are correct."""
54+
graph_module.recompile()
55+
return super().call(graph_module).graph_module

0 commit comments

Comments
 (0)