Skip to content

Commit 7adb108

Browse files
author
morelos
committed
Update on "[ET-VK][Ops] dequantize_per_channel reference impl and testing"
# Context In order to properly enable dynamic quantization, we create the dequantize_per_channel operator as its seemingly useful to have for the pipeline. # Changes This creates the wrapper for the cpu reference implementation, and also a dummy reference implementation I created just to test against it. Differential Revision: [D77746138](https://our.internmc.facebook.com/intern/diff/D77746138/) [ghstack-poisoned]
2 parents 567e5d2 + 57c6383 commit 7adb108

File tree

164 files changed

+4703
-1544
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

164 files changed

+4703
-1544
lines changed

.ci/scripts/test_yolo12.sh

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -ex
9+
# shellcheck source=/dev/null
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
while [[ $# -gt 0 ]]; do
13+
case "$1" in
14+
-model)
15+
MODEL_NAME="$2" # stories110M
16+
shift 2
17+
;;
18+
-mode)
19+
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
20+
shift 2
21+
;;
22+
-pt2e_quantize)
23+
PT2E_QUANTIZE="$2"
24+
shift 2
25+
;;
26+
-upload)
27+
UPLOAD_DIR="$2"
28+
shift 2
29+
;;
30+
-video_path)
31+
VIDEO_PATH="$2" # portable or xnnpack+custom or xnnpack+custom+qe
32+
shift 2
33+
;;
34+
*)
35+
echo "Unknown option: $1"
36+
usage
37+
;;
38+
esac
39+
done
40+
41+
# Default mode to xnnpack+custom if not set
42+
MODE=${MODE:-"openvino"}
43+
44+
# Default UPLOAD_DIR to empty string if not set
45+
UPLOAD_DIR="${UPLOAD_DIR:-}"
46+
47+
# Default PT2E_QUANTIZE to empty string if not set
48+
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
49+
50+
# Default CMake Build Type to release mode
51+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
52+
53+
if [[ $# -lt 5 ]]; then # Assuming 4 mandatory args
54+
echo "Expecting atleast 5 positional arguments"
55+
echo "Usage: [...]"
56+
fi
57+
if [[ -z "${MODEL_NAME:-}" ]]; then
58+
echo "Missing model name, exiting..."
59+
exit 1
60+
fi
61+
62+
63+
if [[ -z "${MODE:-}" ]]; then
64+
echo "Missing mode, choose openvino or xnnpack, exiting..."
65+
exit 1
66+
fi
67+
68+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
69+
PYTHON_EXECUTABLE=python3
70+
fi
71+
72+
TARGET_LIBS=""
73+
74+
if [[ "${MODE}" =~ .*openvino.* ]]; then
75+
OPENVINO=ON
76+
TARGET_LIBS="$TARGET_LIBS openvino_backend "
77+
78+
git clone https://github.com/openvinotoolkit/openvino.git
79+
cd openvino && git b16b776ac119dafda51f69a80f1e6b7376d02c3b
80+
git submodule update --init --recursive
81+
sudo ./install_build_dependencies.sh
82+
mkdir build && cd build
83+
cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON
84+
make -j$(nproc)
85+
86+
cd ..
87+
cmake --install build --prefix dist
88+
89+
source dist/setupvars.sh
90+
cd ../backends/openvino
91+
pip install -r requirements.txt
92+
cd ../../
93+
else
94+
OPENVINO=OFF
95+
fi
96+
97+
if [[ "${MODE}" =~ .*xnnpack.* ]]; then
98+
XNNPACK=ON
99+
TARGET_LIBS="$TARGET_LIBS xnnpack_backend "
100+
else
101+
XNNPACK=OFF
102+
fi
103+
104+
which "${PYTHON_EXECUTABLE}"
105+
106+
107+
DIR="examples/models/yolo12"
108+
$PYTHON_EXECUTABLE -m pip install -r ${DIR}/requirements.txt
109+
110+
cmake_install_executorch_libraries() {
111+
rm -rf cmake-out
112+
build_dir=cmake-out
113+
mkdir $build_dir
114+
115+
116+
retry cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
117+
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
118+
-DEXECUTORCH_BUILD_OPENVINO="$OPENVINO" \
119+
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
120+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
121+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
122+
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
123+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
124+
-B"${build_dir}"
125+
126+
# Build the project
127+
cmake --build ${build_dir} --target install --config ${CMAKE_BUILD_TYPE} -j$(nproc)
128+
129+
export CMAKE_ARGS="
130+
-DEXECUTORCH_BUILD_OPENVINO="$OPENVINO" \
131+
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
132+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
133+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
134+
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
135+
-DEXECUTORCH_ENABLE_LOGGING=ON \
136+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
137+
-DEXECUTORCH_BUILD_PYBIND=ON"
138+
139+
echo $TARGET_LIBS
140+
export CMAKE_BUILD_ARGS="--target $TARGET_LIBS"
141+
pip install . --no-build-isolation
142+
}
143+
144+
cmake_build_demo() {
145+
echo "Building yolo12 runner"
146+
retry cmake \
147+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
148+
-DUSE_OPENVINO_BACKEND="$OPENVINO" \
149+
-DUSE_XNNPACK_BACKEND="$XNNPACK" \
150+
-Bcmake-out/${DIR} \
151+
${DIR}
152+
cmake --build cmake-out/${DIR} -j9 --config "$CMAKE_BUILD_TYPE"
153+
154+
}
155+
156+
cleanup_files() {
157+
rm $EXPORTED_MODEL_NAME
158+
}
159+
160+
prepare_artifacts_upload() {
161+
if [ -n "${UPLOAD_DIR}" ]; then
162+
echo "Preparing for uploading generated artifacs"
163+
zip -j model.zip "${EXPORTED_MODEL_NAME}"
164+
mkdir -p "${UPLOAD_DIR}"
165+
mv model.zip "${UPLOAD_DIR}"
166+
mv result.txt "${UPLOAD_DIR}"
167+
168+
fi
169+
}
170+
171+
172+
# Export model.
173+
EXPORTED_MODEL_NAME="${MODEL_NAME}_fp32_${MODE}.pte"
174+
echo "Exporting ${EXPORTED_MODEL_NAME}"
175+
EXPORT_ARGS="--model_name=${MODEL_NAME} --backend=${MODE}"
176+
177+
# Add dynamically linked library location
178+
cmake_install_executorch_libraries
179+
180+
$PYTHON_EXECUTABLE -m examples.models.yolo12.export_and_validate ${EXPORT_ARGS}
181+
182+
183+
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --input_path=${VIDEO_PATH}"
184+
# Check build tool.
185+
cmake_build_demo
186+
# Run yolo12 runner
187+
NOW=$(date +"%H:%M:%S")
188+
echo "Starting to run yolo12 runner at ${NOW}"
189+
# shellcheck source=/dev/null
190+
cmake-out/examples/models/yolo12/Yolo12DetectionDemo ${RUNTIME_ARGS} > result.txt
191+
NOW=$(date +"%H:%M:%S")
192+
echo "Finished at ${NOW}"
193+
194+
RESULT=$(cat result.txt)
195+
196+
prepare_artifacts_upload
197+
cleanup_files

.github/workflows/lint.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,13 @@ jobs:
8383
script: |
8484
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
8585
extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
86+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/*.java \
87+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/*.java \
88+
extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/*.java \
8689
examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java \
87-
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
90+
examples/demo-apps/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/*.java \
91+
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java \
92+
extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/*.java)
8893
if [ -n "$FILES_NEEDS_FORMAT" ]; then
8994
echo "Warning: The following files need formatting. Please use google-java-format."
9095
echo "Use a binary from https://github.com/google/google-java-format/releases/"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Any, List, Optional, Tuple
8+
9+
import executorch
10+
import executorch.backends.test.harness.stages as BaseStages
11+
12+
import torch
13+
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
14+
from executorch.backends.test.harness import Tester as TesterBase
15+
from executorch.backends.test.harness.stages import StageType
16+
from executorch.exir import EdgeCompileConfig
17+
from executorch.exir.backend.partitioner import Partitioner
18+
19+
20+
class Partition(BaseStages.Partition):
21+
def __init__(self, partitioner: Optional[Partitioner] = None):
22+
super().__init__(
23+
partitioner=partitioner or CoreMLPartitioner,
24+
)
25+
26+
27+
class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
28+
def __init__(
29+
self,
30+
partitioners: Optional[List[Partitioner]] = None,
31+
edge_compile_config: Optional[EdgeCompileConfig] = None,
32+
):
33+
super().__init__(
34+
default_partitioner_cls=CoreMLPartitioner,
35+
partitioners=partitioners,
36+
edge_compile_config=edge_compile_config,
37+
)
38+
39+
40+
class CoreMLTester(TesterBase):
41+
def __init__(
42+
self,
43+
module: torch.nn.Module,
44+
example_inputs: Tuple[torch.Tensor],
45+
dynamic_shapes: Optional[Tuple[Any]] = None,
46+
):
47+
# Specialize for XNNPACK
48+
stage_classes = (
49+
executorch.backends.test.harness.Tester.default_stage_classes()
50+
| {
51+
StageType.PARTITION: Partition,
52+
StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
53+
}
54+
)
55+
56+
super().__init__(
57+
module=module,
58+
stage_classes=stage_classes,
59+
example_inputs=example_inputs,
60+
dynamic_shapes=dynamic_shapes,
61+
)

backends/arm/_passes/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ python_library(
66
deps = [
77
"//executorch/backends/arm:tosa_quant_utils",
88
"//executorch/backends/arm:tosa_utils",
9+
"//executorch/backends/arm/tosa/dialect:lib",
910
"//executorch/backends/transforms:fuse_view_copy",
1011
"//executorch/backends/transforms:remove_getitem_op",
1112
"//executorch/backends/transforms:replace_scalar_with_tensor",

backends/arm/_passes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from .convert_to_clamp import ConvertToClampPass # noqa
2525
from .decompose_acosh_pass import DecomposeAcoshPass # noqa
2626
from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass # noqa
27+
from .decompose_asin_pass import DecomposeAsinPass # noqa
2728
from .decompose_atan_pass import DecomposeAtanPass # noqa
2829
from .decompose_avg_pool2d import DecomposeAvgPool2d # noqa
2930
from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass # noqa
@@ -50,6 +51,7 @@
5051
from .decompose_sqrt_pass import DecomposeSqrtPass # noqa
5152
from .decompose_sum_pass import DecomposeSumPass # noqa
5253
from .decompose_var_pass import DecomposeVarPass # noqa
54+
from .decorate_fp32_to_int32_casting_pass import DecorateFp32toInt32CastingPass # noqa
5355
from .fold_qdq_with_annotated_qparams_pass import ( # noqa
5456
FoldAndAnnotateQParamsPass,
5557
QuantizeOperatorArguments,

backends/arm/_passes/arm_pass_manager.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ConvertToClampPass,
3030
DecomposeAcoshPass,
3131
DecomposeAdaptiveAvgPool2dPass,
32+
DecomposeAsinPass,
3233
DecomposeAtanPass,
3334
DecomposeAvgPool2d,
3435
DecomposeBatchNormNoStatsPass,
@@ -55,6 +56,7 @@
5556
DecomposeSqrtPass,
5657
DecomposeSumPass,
5758
DecomposeVarPass,
59+
DecorateFp32toInt32CastingPass,
5860
FoldAndAnnotateQParamsPass,
5961
FuseBatchnorm2DPass,
6062
FuseConstantArgsPass,
@@ -158,6 +160,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
158160
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
159161
self.add_pass(DecomposeRoundPass())
160162
self.add_pass(DecomposeAcoshPass())
163+
self.add_pass(DecomposeAsinPass())
161164
self.add_pass(DecomposeSqrtPass())
162165
self.add_pass(DecomposeAtanPass())
163166
self.add_pass(ConvertIntPowToMuls())
@@ -198,6 +201,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
198201
self.add_pass(MatchArgRanksPass(exported_program))
199202
self.add_pass(DecomposeAdaptiveAvgPool2dPass())
200203
self.add_pass(DecomposeAvgPool2d())
204+
self.add_pass(
205+
DecorateFp32toInt32CastingPass()
206+
) # Require that no new fp32->int32 is introduced after this pass
201207
self.add_pass(ComputeConstantOpsAOT(exported_program))
202208

203209
self.add_pass(DecomposeGroupedConv())

0 commit comments

Comments
 (0)