Skip to content

Commit 2df8439

Browse files
committed
Update
[ghstack-poisoned]
2 parents 7e4be82 + 335de46 commit 2df8439

File tree

69 files changed

+3086
-996
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+3086
-996
lines changed

.ci/scripts/test_backend_linux.sh

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ SUITE=$1
1010
FLOW=$2
1111
ARTIFACT_DIR=$3
1212

13+
REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
14+
1315
echo "Running backend test job for suite $SUITE, flow $FLOW."
1416
echo "Saving job artifacts to $ARTIFACT_DIR."
1517

@@ -18,13 +20,38 @@ eval "$(conda shell.bash hook)"
1820
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1921
conda activate "${CONDA_ENV}"
2022

21-
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
22-
source .ci/scripts/setup-vulkan-linux-deps.sh
23+
export PYTHON_EXECUTABLE=python
2324

2425
# CMake options to use, in addition to the defaults.
25-
EXTRA_BUILD_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON"
26+
EXTRA_BUILD_ARGS=""
27+
28+
if [[ "$FLOW" == *qnn* ]]; then
29+
# Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the
30+
# Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
31+
# cleaned up.
32+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
33+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
34+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
35+
QNN_X86_LIB_DIR=`realpath build-x86/lib/`
36+
QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
37+
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
38+
39+
# TODO Get SDK root from install scripts
40+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
41+
fi
42+
43+
if [[ "$FLOW" == *vulkan* ]]; then
44+
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
45+
source .ci/scripts/setup-vulkan-linux-deps.sh
46+
47+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
48+
fi
2649

2750
# We need the runner to test the built library.
2851
PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
2952

30-
python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$ARTIFACT_DIR/test_results.csv"
53+
EXIT_CODE=0
54+
python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
55+
56+
# Generate markdown summary.
57+
python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE

.ci/scripts/test_backend_macos.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ SUITE=$1
1010
FLOW=$2
1111
ARTIFACT_DIR=$3
1212

13+
REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
14+
1315
echo "Running backend test job for suite $SUITE, flow $FLOW."
1416
echo "Saving job artifacts to $ARTIFACT_DIR."
1517

@@ -21,4 +23,8 @@ eval "$(conda shell.bash hook)"
2123
PYTHON_EXECUTABLE=python
2224
${CONDA_RUN} --no-capture-output .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Release
2325

24-
${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$ARTIFACT_DIR/test_results.csv"
26+
EXIT_CODE=0
27+
${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
28+
29+
# Generate markdown summary.
30+
${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE

.github/workflows/nightly.yml

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ jobs:
4242
strategy:
4343
fail-fast: false
4444
matrix:
45-
flow: [vulkan, xnnpack, xnnpack_static_int8_per_channel]
45+
flow: [
46+
qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w,
47+
vulkan, vulkan_static_int8_per_channel,
48+
xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor
49+
]
4650
suite: [models, operators]
4751
with:
4852
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -53,11 +57,8 @@ jobs:
5357
upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
5458
script: |
5559
set -eux
56-
# Intentionally suppressing exit code for now.
57-
# TODO (gjcomer) Remove this when jobs are stable.
58-
EXIT_CODE=0
59-
.ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" || EXIT_CODE=$?
60-
echo "Test run complete with exit code $EXIT_CODE."
60+
61+
source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
6162
6263
backend-test-macos:
6364
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -82,6 +83,4 @@ jobs:
8283
# This is needed to get the prebuilt PyTorch wheel from S3
8384
${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
8485
85-
EXIT_CODE=0
86-
.ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" || EXIT_CODE=$?
87-
echo "Test run complete with exit code $EXIT_CODE."
86+
source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"

backends/cadence/aot/functions.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,21 @@
249249
- arg_meta: null
250250
kernel_name: impl::reference::quantized_relu_asym8u_asym8u_per_tensor_out
251251

252+
- func: cadence::quantized_add.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
253+
kernels:
254+
- arg_meta: null
255+
kernel_name: impl::reference::quantized_add_per_tensor_out
256+
257+
- func: cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
258+
kernels:
259+
- arg_meta: null
260+
kernel_name: impl::reference::quantized_add_asym8sxasym8s_asym8s_per_tensor_out
261+
262+
- func: cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
263+
kernels:
264+
- arg_meta: null
265+
kernel_name: impl::reference::quantized_add_asym8uxasym8u_asym8u_per_tensor_out
266+
252267
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
253268
kernels:
254269
- arg_meta: null
@@ -304,6 +319,26 @@
304319
- arg_meta: null
305320
kernel_name: impl::reference::quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out
306321

322+
- func: cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
323+
kernels:
324+
- arg_meta: null
325+
kernel_name: impl::reference::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out
326+
327+
- func: cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
328+
kernels:
329+
- arg_meta: null
330+
kernel_name: impl::reference::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out
331+
332+
- func: cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
333+
kernels:
334+
- arg_meta: null
335+
kernel_name: impl::reference::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out
336+
337+
- func: cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
338+
kernels:
339+
- arg_meta: null
340+
kernel_name: impl::reference::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out
341+
307342
- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
308343
kernels:
309344
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,26 @@
330330
- arg_meta: null
331331
kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out
332332

333+
- func: cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
334+
kernels:
335+
- arg_meta: null
336+
kernel_name: cadence::impl::HiFi::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out
337+
338+
- func: cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
339+
kernels:
340+
- arg_meta: null
341+
kernel_name: cadence::impl::HiFi::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out
342+
343+
- func: cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
344+
kernels:
345+
- arg_meta: null
346+
kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out
347+
348+
- func: cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
349+
kernels:
350+
- arg_meta: null
351+
kernel_name: cadence::impl::HiFi::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out
352+
333353
- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
334354
kernels:
335355
- arg_meta: null
@@ -384,6 +404,16 @@
384404
- arg_meta: null
385405
kernel_name: cadence::impl::HiFi::quantized_relu_asym8u_asym8u_per_tensor_out
386406

407+
- func: cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
408+
kernels:
409+
- arg_meta: null
410+
kernel_name: cadence::impl::HiFi::quantized_add_asym8sxasym8s_asym8s_per_tensor_out
411+
412+
- func: cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
413+
kernels:
414+
- arg_meta: null
415+
kernel_name: cadence::impl::HiFi::quantized_add_asym8uxasym8u_asym8u_per_tensor_out
416+
387417
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
388418
kernels:
389419
- arg_meta: null

backends/cadence/aot/memory_planning.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ def plan_spec(
116116
Greedily place the spec in the first memory that can fit it.
117117
"""
118118
for spec.mem_id in range(1, self.get_num_memories()):
119+
if placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id):
120+
# Skip placement for blocked memory id.
121+
continue
119122
prev_offset, smallest_gap = 0, float("inf")
120123
for allocated_spec in state.allocated_buffers[spec.mem_id]:
121124
if not Verifier.lifetime_overlap(spec, allocated_spec):
@@ -141,11 +144,11 @@ def plan_spec(
141144
)
142145
if spec.mem_offset is None:
143146
spec.mem_offset = prev_offset
144-
if not self.is_valid_placement(spec, placement_constraints):
145-
spec.mem_offset = None
146-
continue
147-
else:
148-
spec.mem_offset = prev_offset
147+
148+
if not self.is_valid_placement(spec, placement_constraints):
149+
# Skip placement for invalid memory id.
150+
spec.mem_offset = None
151+
continue
149152

150153
state.place_spec(spec)
151154
# A data structure used for maintaining the tensor order

backends/cadence/aot/memory_planning_algo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def _place_memory_id_pinned_specs(
204204
for spec, c in spec_with_abs_constraint.items()
205205
if c is not None and c.pinned_memory_id == mem_id and c.offset is None
206206
}
207-
logging.error(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
207+
logging.debug(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
208208

209209
with self.block_memories_except(mem_id):
210210
self.plan(
@@ -220,7 +220,7 @@ def _place_memory_id_pinned_specs(
220220
if constraint is None:
221221
continue
222222

223-
logging.error(f"Placing spec {spec} with {constraint}")
223+
logging.debug(f"Placing spec {spec} with {constraint}")
224224

225225
if not state.is_placed(spec):
226226
raise MemoryError(

0 commit comments

Comments
 (0)