Skip to content

Commit 196ca8e

Browse files
committed
Merge branch 'main' into distribute-jobs-to-private-pools
Signed-off-by: Huy Do <[email protected]>
2 parents f05e946 + 8d57c95 commit 196ca8e

File tree

31 files changed

+416
-190
lines changed

31 files changed

+416
-190
lines changed

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ jobs:
269269
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
270270
bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
271271
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
272-
CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
272+
CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
273273
cmake --build cmake-out -j9 --target install --config Release
274274
CXXFLAGS=${cxx_flags} cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
275275
cmake --build cmake-out/test -j9 --config Release

backends/arm/arm_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def ethosu_compile_spec(
128128
self.compiler_flags.append("--output-format=raw")
129129
self.compiler_flags.append("--debug-force-regor")
130130

131-
base_tosa_version = "TOSA-0.80+BI"
131+
base_tosa_version = "TOSA-1.0+INT"
132132
if "u55" in target:
133133
# Add the Ethos-U55 extension marker
134134
base_tosa_version += "+u55"

backends/arm/test/test_arm_baremetal.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
228228
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00"
229229
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
230230
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
231-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
231+
#python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01" # Takes long time to run
232232
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
233233
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=resnet18 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
234234
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=resnet50 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"

backends/cadence/aot/compiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ def _lower_ep_to_edge(
228228
"""
229229
Lower an ExportedProgram to an EdgeProgramManager (in edge IR).
230230
"""
231+
# Apply passes which transform the ExportedProgram before it gets lowered to edge.
232+
expo_program = apply_torch_ops_passes(expo_program)
233+
231234
# Call to_edge to convert the graph to edge IR.
232235
# Note: dim_order is skipped (https://github.com/pytorch/executorch/issues/3704)
233236
edge_prog_manager = to_edge(
@@ -263,9 +266,6 @@ def export_to_edge(
263266
# Export the model into an ExportedProgram.
264267
expo_program = trace(model, inputs)
265268

266-
# Apply passes which transform the ExportedProgram before it gets lowered to edge.
267-
expo_program = apply_torch_ops_passes(expo_program)
268-
269269
# Lower the model to edge IR.
270270
edge_prog_manager = _lower_ep_to_edge(
271271
expo_program, dump_graphs, constant_methods, core_aten_exceptions

backends/cadence/aot/ops_registrations.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,14 @@
276276
"requantize.per_tensor_out(Tensor input, float in_scale, int in_zero_point, float out_scale, "
277277
"int out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)"
278278
)
279+
lib.define(
280+
"roi_align_box_processor.out(Tensor rois, int output_size_h, int output_size_w, "
281+
"int sampling_ratio, bool aligned, *, Tensor(a!) out) -> Tensor(a!)"
282+
)
283+
lib.define(
284+
"roi_align_box_processor(Tensor rois, int output_size_h, int output_size_w, "
285+
"int sampling_ratio, bool aligned) -> (Tensor out)"
286+
)
279287

280288
# Custom ops with aten namespace. Need to specify the lib var as FRAGMENT type as aten library is already defined
281289
aten_lib = Library("aten", "FRAGMENT")
@@ -1038,3 +1046,14 @@ def idma_store_impl(
10381046
channel: int = 0,
10391047
) -> torch.Tensor:
10401048
return copy_idma_copy_impl(src, task_num, channel)
1049+
1050+
1051+
@register_fake("cadence::roi_align_box_processor")
1052+
def roi_align_box_processor_meta(
1053+
rois: torch.Tensor,
1054+
output_size_h: int,
1055+
output_size_w: int,
1056+
sampling_ratio: int,
1057+
aligned: bool,
1058+
) -> torch.Tensor:
1059+
return rois.new_empty((rois.shape[0], 80), dtype=torch.uint8)

backends/cadence/aot/replace_ops.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,12 +2328,15 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
23282328

23292329
# Extract an argument to a separate full op.
23302330
with graph_module.graph.inserting_before(mul_node):
2331-
full_tensor = graph_module.graph.call_function(
2331+
full_node = graph_module.graph.call_function(
23322332
torch.ops.aten.full.default, args=([1], full_arg)
23332333
)
2334+
full_node.meta = mul_node.meta
2335+
full_node.meta["val"] = [1]
23342336
new_mul_node = graph_module.graph.call_function(
2335-
torch.ops.aten.mul.Tensor, args=(x_arg, full_tensor)
2337+
torch.ops.aten.mul.Tensor, args=(x_arg, full_node)
23362338
)
2339+
new_mul_node.meta = mul_node.meta
23372340
# Replace the old mul with a newly created mul.
23382341
mul_node.replace_all_uses_with(new_mul_node)
23392342
graph_module.graph.erase_node(mul_node)

backends/qualcomm/_passes/layout_transform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ class LayoutTransform(ExportPass):
103103
exir_ops.edge.aten.pow.Tensor_Scalar,
104104
exir_ops.edge.aten.prelu.default,
105105
exir_ops.edge.aten.repeat.default,
106-
exir_ops.edge.aten.round.default,
107106
exir_ops.edge.aten.relu.default,
107+
exir_ops.edge.aten.round.default,
108108
exir_ops.edge.aten.sigmoid.default,
109109
exir_ops.edge.aten.split_with_sizes.default,
110110
exir_ops.edge.aten.split_with_sizes_copy.default,

backends/qualcomm/quantizer/annotators.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->
278278
)
279279

280280

281-
@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])
281+
@register_annotator(
282+
[torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]
283+
)
282284
def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:
283285
annotate_binary(node, quantization_config)
284286

@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:
13111313
)
13121314

13131315

1314-
@register_annotator([torch.ops.aten.zeros.default])
1316+
@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])
13151317
def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:
13161318
if _is_annotated([node]) or not _is_float_tensor(node):
13171319
return

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
153153
)
154154

155155

156-
def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None: # noqa: C901
156+
def annotate_matmul_16a8w( # noqa: C901
157+
gm: torch.fx.GraphModule, annotate_conv=True
158+
) -> None:
157159
"""
158160
This function is specific for matmul op 16a8w.
159161
For k, we will tag such as the below, and
@@ -317,9 +319,10 @@ def annotate_matmul_input1(node: Node):
317319
# The arguments of cat op: (the past kv cache, the new kv cache)
318320
node = node.args[0][1]
319321
elif node.target == torch.ops.aten.conv2d.default:
320-
annotate_conv2d(
321-
node, quantization_config=quantization_config_8a4w_per_channel
322-
)
322+
if annotate_conv:
323+
annotate_conv2d(
324+
node, quantization_config=quantization_config_8a4w_per_channel
325+
)
323326
break
324327
elif node.target in [torch.ops.aten.add.Tensor, torch.ops.aten.sub.Tensor]:
325328
break

backends/qualcomm/scripts/build.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ if [ "$BUILD_AARCH64" = true ]; then
8585
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
8686
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8787
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
88+
-DEXECUTORCH_ENABLE_LOGGING=ON \
8889
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
8990
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
9091
-DANDROID_ABI='arm64-v8a' \
@@ -104,6 +105,9 @@ if [ "$BUILD_AARCH64" = true ]; then
104105
-DANDROID_ABI='arm64-v8a' \
105106
-DANDROID_PLATFORM=android-30 \
106107
-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
108+
-DSUPPORT_REGEX_LOOKAHEAD=ON \
109+
-DBUILD_TESTING=OFF \
110+
-DEXECUTORCH_ENABLE_LOGGING=ON \
107111
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
108112
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
109113
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
@@ -134,6 +138,7 @@ if [ "$BUILD_X86_64" = true ]; then
134138
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
135139
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
136140
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
141+
-DEXECUTORCH_ENABLE_LOGGING=ON \
137142
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
138143
-S $PRJ_ROOT \
139144
-B $BUILD_ROOT \
@@ -157,6 +162,9 @@ if [ "$BUILD_X86_64" = true ]; then
157162
-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
158163
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
159164
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
165+
-DSUPPORT_REGEX_LOOKAHEAD=ON \
166+
-DBUILD_TESTING=OFF \
167+
-DEXECUTORCH_ENABLE_LOGGING=ON \
160168
-B$EXAMPLE_ROOT
161169

162170
cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER

0 commit comments

Comments
 (0)