Skip to content

Commit f520003

Browse files
authored
Merge branch 'main' into android-preset
2 parents 3866941 + 72ef7b1 commit f520003

File tree

101 files changed

+3564
-819
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+3564
-819
lines changed

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
262262

263263
assert torch.allclose(
264264
eager_output.logits, et_output, atol=1e-02, rtol=1e-02
265-
), "CoreML output does not match eager"
265+
), "Model output does not match eager"
266266

267267

268268
if __name__ == "__main__":
269269
parser = argparse.ArgumentParser()
270270
parser.add_argument("--model", type=str, required=True)
271271
parser.add_argument("--recipe", type=str, required=True)
272272
parser.add_argument("--quantize", action="store_true", help="Enable quantization")
273+
parser.add_argument(
274+
"--model_dir",
275+
type=str,
276+
required=False,
277+
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
278+
)
273279
args = parser.parse_args()
274280

275281
model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
294300
f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
295301
)
296302

303+
model_id, test_fn = model_to_model_id_and_test_function[args.model]
297304
with tempfile.TemporaryDirectory() as tmp_dir:
298-
model_id, test_fn = model_to_model_id_and_test_function[args.model]
299305
test_fn(
300306
model_id=model_id,
301-
model_dir=tmp_dir,
307+
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
302308
recipe=args.recipe,
303309
quantize=args.quantize,
304310
)

.ci/scripts/test_model.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ test_model_with_qnn() {
199199
EXPORT_SCRIPT=albert
200200
elif [[ "${MODEL_NAME}" == "bert" ]]; then
201201
EXPORT_SCRIPT=bert
202+
elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
203+
EXPORT_SCRIPT=conv_former
204+
EXTRA_FLAGS="--dataset imagenet-mini/val"
202205
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
203206
EXPORT_SCRIPT=cvt
204207
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +241,7 @@ test_model_with_qnn() {
238241
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
239242
SCRIPT_FOLDER=oss_scripts
240243
;;
241-
"albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
244+
"albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
242245
pip install evaluate
243246
SCRIPT_FOLDER=oss_scripts
244247
# 16bit models will encounter op validation fail on some operations,

.github/workflows/trunk.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ jobs:
568568
strategy:
569569
matrix:
570570
dtype: [fp32]
571-
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
571+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
572572
fail-fast: false
573573
with:
574574
runner: linux.2xlarge
@@ -815,6 +815,9 @@ jobs:
815815
smollm|coreml_fp32_gpu|--quantize,
816816
llama3|coreml_fp32_gpu|--quantize,
817817
olmo|coreml_fp32_gpu|--quantize,
818+
# roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
819+
bert|coreml_fp32_gpu|--quantize,
820+
distilbert|coreml_fp32_gpu|--quantize,
818821
]
819822
fail-fast: false
820823
with:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma once
2+
3+
namespace executorch::core_ml_backend_delegate {
4+
void register_backend_coreml();
5+
} // namespace executorch::core_ml_backend_delegate
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#pragma once
2+
3+
#include "executorch_operations.h"
4+
#import <coreml_backend/delegate.h>
5+
#import "ETCoreMLStrings.h"
6+
#import "backend_delegate.h"
7+
8+
#import <executorch/runtime/core/evalue.h>
9+
#import <executorch/runtime/platform/log.h>
10+
#import <executorch/runtime/backend/interface.h>
11+
12+
#include <array>
13+
#import <memory>
14+
15+
namespace executorch::core_ml_backend_delegate {
16+
using executorch::runtime::get_backend_class;
17+
18+
static std::unique_ptr<executorch::backends::coreml::CoreMLBackendDelegate> backendInterfaceLazy_;
19+
20+
void register_backend_coreml() {
21+
auto backendInterface = executorch::runtime::get_backend_class(ETCoreMLStrings.delegateIdentifier.UTF8String);
22+
if (backendInterface == nullptr) {
23+
backendInterfaceLazy_ = std::make_unique<executorch::backends::coreml::CoreMLBackendDelegate>();
24+
executorch::runtime::Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, backendInterfaceLazy_.get()};
25+
std::ignore = register_backend(backend);
26+
}
27+
}
28+
29+
} // namespace executorch::core_ml_backend_delegate

backends/arm/CMakeLists.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ endif()
1414

1515
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
1616

17-
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
17+
set(_common_include_directories
18+
${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
19+
)
1820
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
1921

2022

@@ -34,13 +36,12 @@ set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
3436
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
3537

3638
add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
37-
target_include_directories(
38-
executorch_delegate_ethos_u PUBLIC ${_common_include_directories}
39-
)
40-
target_include_directories(
41-
executorch_delegate_ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR}
39+
target_link_libraries(
40+
executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
4241
)
4342

43+
install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
44+
4445
# end config for bare metal builds
4546
endif()
4647

backends/arm/_passes/annotate_decomposed_matmul.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import torch
1313
from executorch.backends.arm._passes.arm_pass_utils import create_node
1414

15-
from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
15+
from executorch.backends.arm.constants import DQ_OPS, Q_OPS
1616
from executorch.exir.dialects._ops import ops as exir_ops
1717
from executorch.exir.dialects.edge._ops import EdgeOpOverload
1818
from executorch.exir.pass_base import ExportPass, PassResult
@@ -62,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
6262
}
6363
for partition in matmul_partitions:
6464
quantized_input = all(
65-
input_node.target in dq_ops for input_node in partition.input_nodes
65+
input_node.target in DQ_OPS for input_node in partition.input_nodes
6666
)
6767
matmul_node = [
6868
node for node in partition.nodes if node.target in matmul_targets
@@ -93,7 +93,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
9393
graph_module.graph.erase_node(partition_input)
9494

9595
partition_output = list(partition.output_nodes[0].users)[0]
96-
quantized_output = partition_output.target in q_ops
96+
quantized_output = partition_output.target in Q_OPS
9797
if quantized_output:
9898
with graph_module.graph.inserting_after(matmul_node):
9999
# Create q-node after matmul

backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
get_param_tensor,
1616
is_param_node,
1717
)
18+
from executorch.backends.arm.constants import DQ_OPS, Q_OPS
1819

19-
from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
20+
from executorch.backends.arm.tosa_quant_utils import QuantArgs
2021

2122
from executorch.exir.dialects._ops import ops as exir_ops
2223
from executorch.exir.dialects.edge._ops import EdgeOpOverload
@@ -109,7 +110,7 @@ def fold_and_annotate_arg(
109110
return
110111

111112
arg_quant_params = None
112-
if arg.target in dq_ops:
113+
if arg.target in DQ_OPS:
113114
args = arg.args
114115
scales = args[1]
115116
if (
@@ -137,9 +138,9 @@ def fold_and_annotate_arg(
137138
if input_qparams is not None:
138139
node.meta["input_qparams"][i] = input_qparams
139140
for n in nodes_to_remove:
140-
if n.target not in dq_ops:
141+
if n.target not in DQ_OPS:
141142
raise RuntimeError(
142-
f"Expected one of {dq_ops} dq_op, got {n.target}"
143+
f"Expected one of {DQ_OPS} dq_op, got {n.target}"
143144
)
144145

145146
node.replace_input_with(n, cast(Node, n.args[0]))
@@ -154,7 +155,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
154155
if n.op != "call_function":
155156
continue
156157
# Don't fold chains of quant-ops into each other.
157-
if n.target in (*q_ops, *dq_ops):
158+
if n.target in (*Q_OPS, *DQ_OPS):
158159
continue
159160

160161
# Make sure we haven't already set qparams meta information on the node
@@ -184,7 +185,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
184185
# Copy the users, since we are modifying it.
185186
users_copy = copy.copy(n.users)
186187
for i, user in enumerate(users_copy):
187-
if user.target not in q_ops:
188+
if user.target not in Q_OPS:
188189
continue
189190

190191
# quantization node found here, store the quantization parameters in meta value
@@ -221,7 +222,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
221222

222223
# Make sure we have a quantized operator
223224
user = list(n.users)[0]
224-
if user.target not in q_ops:
225+
if user.target not in Q_OPS:
225226
continue
226227

227228
qargs = QuantArgs.from_operator(user.target, user.args)

backends/arm/_passes/fuse_quantized_activation_pass.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
# pyre-unsafe
77

88
import torch
9-
from executorch.backends.arm.tosa_quant_utils import q_ops, QuantArgs
9+
from executorch.backends.arm.constants import Q_OPS
10+
from executorch.backends.arm.tosa_quant_utils import QuantArgs
1011
from executorch.exir.dialects._ops import ops as exir_ops
1112
from executorch.exir.pass_base import ExportPass, PassResult
1213
from torch.fx import Node
@@ -21,7 +22,7 @@ def _is_fuseable_quantized_activation(node: Node):
2122
min_val = node.args[1]
2223
is_fuseable = min_val == 0
2324

24-
is_quantized = len(node.users) == 1 and next(iter(node.users)).target in q_ops
25+
is_quantized = len(node.users) == 1 and next(iter(node.users)).target in Q_OPS
2526
if is_fuseable and is_quantized:
2627
quant_node = next(iter(node.users))
2728
quant_args = QuantArgs.from_operator(quant_node.target, quant_node.args)

backends/arm/_passes/insert_rescales_pass.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
import torch
1111
from executorch.backends.arm._passes.arm_pass_utils import create_node
12-
from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
12+
from executorch.backends.arm.constants import DQ_OPS, Q_OPS
13+
from executorch.backends.arm.tosa_quant_utils import QuantArgs
1314
from executorch.exir.pass_base import ExportPass, PassResult
1415
from torch import Tensor
1516
from torch.fx import GraphModule, Node
@@ -94,11 +95,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
9495
for node in graph_module.graph.nodes:
9596
node = cast(Node, node)
9697

97-
if node.target not in dq_ops:
98+
if node.target not in DQ_OPS:
9899
continue
99100
# Copy users since we remove them while iterating, modyfing the node.users list.
100101
for user in copy(node.users):
101-
if user.target in q_ops:
102+
if user.target in Q_OPS:
102103
self.fold_dq_q_to_rescale(node, user, graph_module)
103104
modified = True
104105
if len(node.users) == 0:

0 commit comments

Comments
 (0)