Skip to content

Commit eb3ddba

Browse files
committed
Update on "[ET-VK] 8/n Split dispatches between multiple command buffers. This diff adds a config to limit the maximum number of command buffers created when splitting execution between multiple command buffers."
This diff introduces a new configuration option, `execute_max_cmds`, to limit the maximum number of command buffers created when splitting execution between multiple command buffers. This feature allows for more efficient management of command buffers, particularly in scenarios where the number of nodes in the graph is large. Differential Revision: [D79575908](https://our.internmc.facebook.com/intern/diff/D79575908/) [ghstack-poisoned]
2 parents 9a638b0 + 5f5523b commit eb3ddba

File tree

120 files changed

+4679
-1260
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+4679
-1260
lines changed

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
262262

263263
assert torch.allclose(
264264
eager_output.logits, et_output, atol=1e-02, rtol=1e-02
265-
), "CoreML output does not match eager"
265+
), "Model output does not match eager"
266266

267267

268268
if __name__ == "__main__":
269269
parser = argparse.ArgumentParser()
270270
parser.add_argument("--model", type=str, required=True)
271271
parser.add_argument("--recipe", type=str, required=True)
272272
parser.add_argument("--quantize", action="store_true", help="Enable quantization")
273+
parser.add_argument(
274+
"--model_dir",
275+
type=str,
276+
required=False,
277+
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
278+
)
273279
args = parser.parse_args()
274280

275281
model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
294300
f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
295301
)
296302

303+
model_id, test_fn = model_to_model_id_and_test_function[args.model]
297304
with tempfile.TemporaryDirectory() as tmp_dir:
298-
model_id, test_fn = model_to_model_id_and_test_function[args.model]
299305
test_fn(
300306
model_id=model_id,
301-
model_dir=tmp_dir,
307+
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
302308
recipe=args.recipe,
303309
quantize=args.quantize,
304310
)

.ci/scripts/test_model.sh

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -166,34 +166,49 @@ test_model_with_qnn() {
166166
export PYTHONPATH=$EXECUTORCH_ROOT/..
167167

168168
EXTRA_FLAGS=""
169+
# Ordered by the folder name, then alphabetically by the model name
170+
# Following models are inside examples/qualcomm/scripts folder
169171
if [[ "${MODEL_NAME}" == "dl3" ]]; then
170172
EXPORT_SCRIPT=deeplab_v3
171-
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
172-
EXPORT_SCRIPT=mobilenet_v3
173-
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
174-
EXPORT_SCRIPT=mobilenet_v2
175-
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
176-
EXPORT_SCRIPT=inception_v4
173+
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
174+
EXPORT_SCRIPT=edsr
175+
# Additional deps for edsr
176+
pip install piq
177177
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
178178
EXPORT_SCRIPT=inception_v3
179-
elif [[ "${MODEL_NAME}" == "vit" ]]; then
180-
EXPORT_SCRIPT=torchvision_vit
179+
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
180+
EXPORT_SCRIPT=inception_v4
181181
elif [[ "${MODEL_NAME}" == "mb" ]]; then
182182
EXPORT_SCRIPT=mobilebert_fine_tune
183183
EXTRA_FLAGS="--num_epochs 1"
184184
pip install scikit-learn
185+
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
186+
EXPORT_SCRIPT=mobilenet_v2
187+
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
188+
EXPORT_SCRIPT=mobilenet_v3
189+
elif [[ "${MODEL_NAME}" == "vit" ]]; then
190+
EXPORT_SCRIPT=torchvision_vit
185191
elif [[ "${MODEL_NAME}" == "w2l" ]]; then
186192
EXPORT_SCRIPT=wav2letter
187193
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
188194
EXPORT_SCRIPT=edsr
189195
# Additional deps for edsr
190196
pip install piq
197+
# Following models are inside examples/qualcomm/oss_scripts folder
198+
elif [[ "${MODEL_NAME}" == "albert" ]]; then
199+
EXPORT_SCRIPT=albert
200+
elif [[ "${MODEL_NAME}" == "bert" ]]; then
201+
EXPORT_SCRIPT=bert
191202
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
192203
EXPORT_SCRIPT=cvt
204+
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
205+
EXPORT_SCRIPT=distilbert
193206
elif [[ "${MODEL_NAME}" == "dit" ]]; then
194207
EXPORT_SCRIPT=dit
195208
elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then
196209
EXPORT_SCRIPT=efficientnet
210+
elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
211+
EXPORT_SCRIPT=eurobert
197212
elif [[ "${MODEL_NAME}" == "focalnet" ]]; then
198213
EXPORT_SCRIPT=focalnet
199214
elif [[ "${MODEL_NAME}" == "mobilevit_v1" ]]; then
@@ -202,18 +217,10 @@ test_model_with_qnn() {
202217
EXPORT_SCRIPT=mobilevit_v2
203218
elif [[ "${MODEL_NAME}" == "pvt" ]]; then
204219
EXPORT_SCRIPT=pvt
205-
elif [[ "${MODEL_NAME}" == "swin" ]]; then
206-
EXPORT_SCRIPT=swin_transformer
207-
elif [[ "${MODEL_NAME}" == "albert" ]]; then
208-
EXPORT_SCRIPT=albert
209-
elif [[ "${MODEL_NAME}" == "bert" ]]; then
210-
EXPORT_SCRIPT=bert
211-
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
212-
EXPORT_SCRIPT=distilbert
213-
elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
214-
EXPORT_SCRIPT=eurobert
215220
elif [[ "${MODEL_NAME}" == "roberta" ]]; then
216221
EXPORT_SCRIPT=roberta
222+
elif [[ "${MODEL_NAME}" == "swin" ]]; then
223+
EXPORT_SCRIPT=swin_transformer
217224
else
218225
echo "Unsupported model $MODEL_NAME"
219226
exit 1

.github/workflows/trunk.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,12 +285,12 @@ jobs:
285285
setup_script_args=""
286286
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
287287
toolchain_prefix=arm-none-eabi-
288-
threshold="109000"
288+
threshold="110592" # 108 KiB
289289
toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
290290
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
291291
setup_script_args="--target-toolchain zephyr"
292292
toolchain_prefix=arm-zephyr-eabi-
293-
threshold="135000"
293+
threshold="135168" # 132 KiB
294294
toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
295295
else
296296
echo "Fail unsupport OS selection ${{ matrix.os }}"
@@ -815,6 +815,9 @@ jobs:
815815
smollm|coreml_fp32_gpu|--quantize,
816816
llama3|coreml_fp32_gpu|--quantize,
817817
olmo|coreml_fp32_gpu|--quantize,
818+
# roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
819+
bert|coreml_fp32_gpu|--quantize,
820+
distilbert|coreml_fp32_gpu|--quantize,
818821
]
819822
fail-fast: false
820823
with:

backends/apple/coreml/compiler/coreml_preprocess.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,15 +126,18 @@ def model_compute_precision_from_compile_specs(
126126

127127
@staticmethod
128128
def generate_minimum_deployment_target_compile_spec(
129-
min_deployment_target: ct.target,
129+
min_deployment_target: Optional[ct.target],
130130
) -> CompileSpec:
131131
"""
132132
Returns the compile spec representing the minimum deployment target on which the model can run,
133133
for additional details please refer to the documentation for ``coremltools.target``.
134134
"""
135+
value = str("").encode("utf-8")
136+
if min_deployment_target is not None:
137+
value = str(min_deployment_target.value).encode("utf-8")
135138
return CompileSpec(
136139
COMPILE_SPEC_KEYS.MIN_DEPLOYMENT_TARGET.value,
137-
str(min_deployment_target.value).encode("utf-8"),
140+
value,
138141
)
139142

140143
@staticmethod
@@ -146,10 +149,13 @@ def min_deployment_target_from_compile_specs(
146149
"""
147150
for compile_spec in compile_specs:
148151
if compile_spec.key == COMPILE_SPEC_KEYS.MIN_DEPLOYMENT_TARGET.value:
149-
compile_spec_value: int = int(compile_spec.value.decode("utf-8"))
152+
value = compile_spec.value.decode("utf-8")
153+
if value == "":
154+
return None
155+
compile_spec_value: int = int(value)
150156
return ct.target(compile_spec_value)
151157

152-
return ct.target.iOS15
158+
return None
153159

154160
@staticmethod
155161
def compute_unit_from_compile_specs(
@@ -211,7 +217,7 @@ def op_linear_quantizer_config_from_compile_specs(
211217
@staticmethod
212218
def generate_compile_specs(
213219
compute_unit: ct.ComputeUnit = ct.ComputeUnit.ALL,
214-
minimum_deployment_target: ct.target = ct.target.iOS15,
220+
minimum_deployment_target: Optional[ct.target] = None,
215221
compute_precision: ct.precision = ct.precision.FLOAT16,
216222
model_type: MODEL_TYPE = MODEL_TYPE.MODEL,
217223
op_linear_quantizer_config: Optional[Dict] = None,
@@ -248,6 +254,13 @@ def model_metadata_from_spec(
248254
input_names: List[str] = [input.name for input in model_spec.description.input]
249255
output_names = [output.name for output in model_spec.description.output]
250256

257+
if len(output_names) == 0:
258+
raise ValueError("Cannot lower a model with no outputs in CoreML.")
259+
if len(input_names) == 0:
260+
assert (
261+
model_spec.specificationVersion >= 9
262+
), "Deploying a model with no inputs in CoreML requires you set minimum_deployment_target to iOS18 or later in the CoreMLPartitioner."
263+
251264
return ModelMetadata(
252265
inputNames=input_names, outputNames=output_names, identifier=identifier
253266
)
@@ -352,6 +365,12 @@ def preprocess_model(
352365
dir_path: Path = Path("tmp") / identifier
353366
model_dir_path: Path = dir_path / "lowered_module"
354367
model_spec: ct.proto.Model_pb2 = mlmodel.get_spec()
368+
logger.warning(
369+
f"The model with identifier {identifier} was exported with CoreML specification version {model_spec.specificationVersion}, and it will not run on all version of iOS/macOS."
370+
" See https://apple.github.io/coremltools/mlmodel/Format/Model.html#model for information on what OS versions are compatible with this specifcation version."
371+
" If you want to control the deployment target, please set the minimum_deployment_target compile spec in the CoreMLPartitioner."
372+
)
373+
355374
model_metadata: ModelMetadata = CoreMLBackend.model_metadata_from_spec(
356375
model_spec=model_spec,
357376
identifier=identifier,
@@ -418,7 +437,7 @@ def preprocess(
418437
model_compute_precision: ct.precision = (
419438
CoreMLBackend.model_compute_precision_from_compile_specs(compile_specs)
420439
)
421-
minimum_deployment_target: ct.target = (
440+
minimum_deployment_target: Optional[ct.target] = (
422441
CoreMLBackend.min_deployment_target_from_compile_specs(compile_specs)
423442
)
424443
compute_units: ct.ComputeUnit = CoreMLBackend.compute_unit_from_compile_specs(
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma once
2+
3+
namespace executorch::core_ml_backend_delegate {
4+
void register_backend_coreml();
5+
} // namespace executorch::core_ml_backend_delegate
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#pragma once
2+
3+
#include "executorch_operations.h"
4+
#import <coreml_backend/delegate.h>
5+
#import "ETCoreMLStrings.h"
6+
#import "backend_delegate.h"
7+
8+
#import <executorch/runtime/core/evalue.h>
9+
#import <executorch/runtime/platform/log.h>
10+
#import <executorch/runtime/backend/interface.h>
11+
12+
#include <array>
13+
#import <memory>
14+
15+
namespace executorch::core_ml_backend_delegate {
16+
using executorch::runtime::get_backend_class;
17+
18+
static std::unique_ptr<executorch::backends::coreml::CoreMLBackendDelegate> backendInterfaceLazy_;
19+
20+
void register_backend_coreml() {
21+
auto backendInterface = executorch::runtime::get_backend_class(ETCoreMLStrings.delegateIdentifier.UTF8String);
22+
if (backendInterface == nullptr) {
23+
backendInterfaceLazy_ = std::make_unique<executorch::backends::coreml::CoreMLBackendDelegate>();
24+
executorch::runtime::Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, backendInterfaceLazy_.get()};
25+
std::ignore = register_backend(backend);
26+
}
27+
}
28+
29+
} // namespace executorch::core_ml_backend_delegate

backends/apple/coreml/runtime/delegate/model_metadata.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ struct ModelMetadata {
2929
inline ModelMetadata() noexcept { }
3030

3131
/// Returns `true` if the metadata is valid otherwise `false`.
32-
inline bool is_valid() const noexcept {
33-
return !identifier.empty() && !input_names.empty() && !output_names.empty();
34-
}
32+
inline bool is_valid() const noexcept { return !identifier.empty() && !output_names.empty(); }
3533

3634
inline std::string to_json_string() const noexcept { return executorchcoreml::serde::json::to_json_string(*this); }
3735

backends/arm/CMakeLists.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ endif()
1414

1515
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
1616

17-
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
17+
set(_common_include_directories
18+
${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
19+
)
1820
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
1921

2022

@@ -34,13 +36,12 @@ set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
3436
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
3537

3638
add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
37-
target_include_directories(
38-
executorch_delegate_ethos_u PUBLIC ${_common_include_directories}
39-
)
40-
target_include_directories(
41-
executorch_delegate_ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR}
39+
target_link_libraries(
40+
executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
4241
)
4342

43+
install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
44+
4445
# end config for bare metal builds
4546
endif()
4647

backends/arm/_passes/annotate_decomposed_matmul.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import torch
1313
from executorch.backends.arm._passes.arm_pass_utils import create_node
1414

15-
from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
15+
from executorch.backends.arm.constants import DQ_OPS, Q_OPS
1616
from executorch.exir.dialects._ops import ops as exir_ops
1717
from executorch.exir.dialects.edge._ops import EdgeOpOverload
1818
from executorch.exir.pass_base import ExportPass, PassResult
@@ -62,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
6262
}
6363
for partition in matmul_partitions:
6464
quantized_input = all(
65-
input_node.target in dq_ops for input_node in partition.input_nodes
65+
input_node.target in DQ_OPS for input_node in partition.input_nodes
6666
)
6767
matmul_node = [
6868
node for node in partition.nodes if node.target in matmul_targets
@@ -93,7 +93,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
9393
graph_module.graph.erase_node(partition_input)
9494

9595
partition_output = list(partition.output_nodes[0].users)[0]
96-
quantized_output = partition_output.target in q_ops
96+
quantized_output = partition_output.target in Q_OPS
9797
if quantized_output:
9898
with graph_module.graph.inserting_after(matmul_node):
9999
# Create q-node after matmul

backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
get_param_tensor,
1616
is_param_node,
1717
)
18+
from executorch.backends.arm.constants import DQ_OPS, Q_OPS
1819

19-
from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
20+
from executorch.backends.arm.tosa_quant_utils import QuantArgs
2021

2122
from executorch.exir.dialects._ops import ops as exir_ops
2223
from executorch.exir.dialects.edge._ops import EdgeOpOverload
@@ -109,7 +110,7 @@ def fold_and_annotate_arg(
109110
return
110111

111112
arg_quant_params = None
112-
if arg.target in dq_ops:
113+
if arg.target in DQ_OPS:
113114
args = arg.args
114115
scales = args[1]
115116
if (
@@ -137,9 +138,9 @@ def fold_and_annotate_arg(
137138
if input_qparams is not None:
138139
node.meta["input_qparams"][i] = input_qparams
139140
for n in nodes_to_remove:
140-
if n.target not in dq_ops:
141+
if n.target not in DQ_OPS:
141142
raise RuntimeError(
142-
f"Expected one of {dq_ops} dq_op, got {n.target}"
143+
f"Expected one of {DQ_OPS} dq_op, got {n.target}"
143144
)
144145

145146
node.replace_input_with(n, cast(Node, n.args[0]))
@@ -154,7 +155,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
154155
if n.op != "call_function":
155156
continue
156157
# Don't fold chains of quant-ops into each other.
157-
if n.target in (*q_ops, *dq_ops):
158+
if n.target in (*Q_OPS, *DQ_OPS):
158159
continue
159160

160161
# Make sure we haven't already set qparams meta information on the node
@@ -184,7 +185,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
184185
# Copy the users, since we are modifying it.
185186
users_copy = copy.copy(n.users)
186187
for i, user in enumerate(users_copy):
187-
if user.target not in q_ops:
188+
if user.target not in Q_OPS:
188189
continue
189190

190191
# quantization node found here, store the quantization parameters in meta value
@@ -221,7 +222,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
221222

222223
# Make sure we have a quantized operator
223224
user = list(n.users)[0]
224-
if user.target not in q_ops:
225+
if user.target not in Q_OPS:
225226
continue
226227

227228
qargs = QuantArgs.from_operator(user.target, user.args)

0 commit comments

Comments
 (0)