Skip to content

Commit 616831b

Browse files
committed
Update base for Update on "[ExecuTorch] XNNPACK: prefer qc over qb when gs == k for non-int4"
* Prefer chanelwise over groupwise when possible for perf and for int8 which doesn't have groupwise support * Fix bug / improve behavior for affine q/dq with gs == k for per_channel * refactor is_per_channel_group state variable * add QuantParams.__str__() TODO - improve affine quant primitives - T237476295 Differential Revision: [D82060758](https://our.internmc.facebook.com/intern/diff/D82060758/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D82060758/)! [ghstack-poisoned]
2 parents 0b78412 + 6b9c0a6 commit 616831b

File tree

302 files changed

+9747
-6770
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

302 files changed

+9747
-6770
lines changed

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,6 @@ install_enn_backend() {
5454
rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
5555
ANDROID_NDK_VERSION=r27b
5656

57-
pushd .
58-
cd /tmp
59-
curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
60-
unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
61-
62-
# Print the content for manual verification
63-
ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
64-
sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
65-
popd
6657
# build Exynos backend
6758
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
6859
bash backends/samsung/build.sh --build all

.ci/scripts/test_model.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ function ExportModel-Xnnpack {
3434
[bool]$quantize
3535
)
3636

37-
if $(quantize) {
37+
if ($quantize) {
3838
python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize | Write-Host
3939
$modelFile = "$($modelName)_xnnpack_q8.pte"
4040
} else {

.ci/scripts/wheel/pre_build_script.sh

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,26 @@ set -euxo pipefail
99

1010
# This script is run before building ExecuTorch binaries
1111

12+
# Clone nested submodules for tokenizers - this is a workaround for recursive
13+
# submodule clone failing due to path length limitations on Windows. Eventually,
14+
# we should update the core job in test-infra to enable long paths before
15+
# checkout to avoid needing to do this.
16+
pushd extension/llm/tokenizers
17+
git submodule update --init
18+
popd
19+
20+
# On Windows, enable symlinks and re-checkout the current revision to create
21+
# the symlinked src/ directory. This is needed to build the wheel.
22+
UNAME_S=$(uname -s)
23+
if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then
24+
echo "Enabling symlinks on Windows"
25+
git config core.symlinks true
26+
git checkout -f HEAD
27+
fi
28+
1229
# Manually install build requirements because `python setup.py bdist_wheel` does
1330
# not install them. TODO(dbort): Switch to using `python -m build --wheel`,
1431
# which does install them. Though we'd need to disable build isolation to be
1532
# able to see the installed torch package.
1633

17-
"${GITHUB_WORKSPACE}/${REPOSITORY}/install_requirements.sh" --example
34+
"${GITHUB_WORKSPACE}/${REPOSITORY}/install_requirements.sh" --example

.ci/scripts/wheel/test_windows.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env python
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
from typing import List
9+
10+
import torch
11+
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
12+
from executorch.examples.models import Backend, Model, MODEL_NAME_TO_MODEL
13+
from executorch.examples.models.model_factory import EagerModelFactory
14+
from executorch.examples.xnnpack import MODEL_NAME_TO_OPTIONS
15+
from executorch.examples.xnnpack.quantization.utils import quantize as quantize_xnn
16+
from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
17+
from executorch.extension.pybindings.portable_lib import (
18+
_load_for_executorch_from_buffer,
19+
)
20+
from test_base import ModelTest
21+
22+
23+
def test_model_xnnpack(model: Model, quantize: bool) -> None:
24+
model_instance, example_inputs, _, _ = EagerModelFactory.create_model(
25+
*MODEL_NAME_TO_MODEL[str(model)]
26+
)
27+
28+
model_instance.eval()
29+
ref_outputs = model_instance(*example_inputs)
30+
31+
if quantize:
32+
quant_type = MODEL_NAME_TO_OPTIONS[str(model)].quantization
33+
model_instance = torch.export.export_for_training(
34+
model_instance, example_inputs
35+
)
36+
model_instance = quantize_xnn(
37+
model_instance.module(), example_inputs, quant_type
38+
)
39+
40+
lowered = to_edge_transform_and_lower(
41+
torch.export.export(model_instance, example_inputs),
42+
partitioner=[XnnpackPartitioner()],
43+
compile_config=EdgeCompileConfig(
44+
_check_ir_validity=False,
45+
),
46+
).to_executorch()
47+
48+
loaded_model = _load_for_executorch_from_buffer(lowered.buffer)
49+
et_outputs = loaded_model([*example_inputs])
50+
51+
if isinstance(ref_outputs, torch.Tensor):
52+
ref_outputs = (ref_outputs,)
53+
54+
assert len(ref_outputs) == len(et_outputs)
55+
for i in range(len(ref_outputs)):
56+
torch.testing.assert_close(ref_outputs[i], et_outputs[i], atol=1e-4, rtol=1e-5)
57+
58+
59+
def run_tests(model_tests: List[ModelTest]) -> None:
60+
for model_test in model_tests:
61+
if model_test.backend == Backend.Xnnpack:
62+
test_model_xnnpack(model_test.model, quantize=False)
63+
else:
64+
raise RuntimeError(f"Unsupported backend {model_test.backend}.")
65+
66+
67+
if __name__ == "__main__":
68+
run_tests(
69+
model_tests=[
70+
ModelTest(
71+
model=Model.Mv3,
72+
backend=Backend.Xnnpack,
73+
),
74+
]
75+
)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
REM This is lightly modified from the torchvision Windows build logic.
2+
REM See https://github.com/pytorch/vision/blob/main/packaging/windows/internal/vc_env_helper.bat
3+
4+
@echo on
5+
6+
set VC_VERSION_LOWER=17
7+
set VC_VERSION_UPPER=18
8+
9+
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
10+
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
11+
set "VS15INSTALLDIR=%%i"
12+
set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
13+
goto vswhere
14+
)
15+
)
16+
17+
:vswhere
18+
if "%VSDEVCMD_ARGS%" == "" (
19+
call "%VS15VCVARSALL%" x64 || exit /b 1
20+
) else (
21+
call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1
22+
)
23+
24+
@echo on
25+
26+
if "%CU_VERSION%" == "xpu" call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
27+
28+
set DISTUTILS_USE_SDK=1
29+
30+
set args=%1
31+
shift
32+
:start
33+
if [%1] == [] goto done
34+
set args=%args% %1
35+
shift
36+
goto start
37+
38+
:done
39+
if "%args%" == "" (
40+
echo Usage: vc_env_helper.bat [command] [args]
41+
echo e.g. vc_env_helper.bat cl /c test.cpp
42+
)
43+
44+
set work_dir=%CD%
45+
if exist setup.py (
46+
echo "Creating symlink..."
47+
REM Setup a symlink to shorten the path length.
48+
REM Note that the ET directory has to be named "executorch".
49+
cd %GITHUB_WORKSPACE%
50+
if not exist et\ (
51+
mkdir et
52+
)
53+
cd et
54+
echo Work dir: %work_dir%
55+
if not exist executorch\ (
56+
mklink /d executorch %work_dir%
57+
)
58+
cd executorch
59+
)
60+
61+
%args% || exit /b 1
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: Build Windows Wheels
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- nightly
8+
- main
9+
- release/*
10+
tags:
11+
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12+
# Release candidate tags look like: v1.11.0-rc1
13+
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
14+
workflow_dispatch:
15+
16+
permissions:
17+
id-token: write
18+
contents: read
19+
20+
jobs:
21+
generate-matrix:
22+
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
23+
with:
24+
package-type: wheel
25+
os: windows
26+
test-infra-repository: pytorch/test-infra
27+
test-infra-ref: main
28+
with-cuda: disabled
29+
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
31+
32+
build:
33+
needs: generate-matrix
34+
strategy:
35+
fail-fast: false
36+
matrix:
37+
include:
38+
- repository: pytorch/executorch
39+
pre-script: .ci\\scripts\\wheel\\pre_build_script.sh
40+
env-script: .ci\\scripts\\wheel\\vc_env_helper.bat
41+
post-script: .ci\\scripts\\wheel\\post_build_script.sh
42+
smoke-test-script: .ci/scripts/wheel/test_windows.py
43+
package-name: executorch
44+
name: ${{ matrix.repository }}
45+
uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main
46+
with:
47+
repository: ${{ matrix.repository }}
48+
ref: ""
49+
test-infra-repository: pytorch/test-infra
50+
test-infra-ref: main
51+
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
52+
pre-script: ${{ matrix.pre-script }}
53+
env-script: ${{ matrix.env-script }}
54+
post-script: ${{ matrix.post-script }}
55+
package-name: ${{ matrix.package-name }}
56+
smoke-test-script: ${{ matrix.smoke-test-script }}
57+
trigger-event: ${{ github.event_name }}
58+
wheel-build-params: "--verbose"
59+
submodules: true

.github/workflows/pull.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ jobs:
874874
contents: read
875875
with:
876876
runner: linux.2xlarge
877-
docker-image: ci-image:executorch-ubuntu-22.04-gcc9
877+
docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
878878
submodules: 'recursive'
879879
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
880880
timeout: 90
@@ -892,7 +892,7 @@ jobs:
892892
source .ci/scripts/setup-samsung-linux-deps.sh
893893
894894
# Test models serially
895-
models="mv2 ic3 resnet18 resnet50"
895+
models="mv2 ic3 resnet18 resnet50 mv3 ic4 dl3 edsr vit w2l"
896896
for model in $models; do
897897
python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
898898
done
@@ -971,6 +971,13 @@ jobs:
971971
./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
972972
./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
973973
974+
# "Classic" Operator tests
975+
PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_op.sh --build
976+
# TODO(ssjia): figure out how to run custom op tests in CI. Currently, they are
977+
# failing due to to the libstdc++.so.6 installed with conda not supporting
978+
# GLIBCXX_3.4.30. These tests are still run in Meta internal CI.
979+
# ./cmake-out/backends/vulkan/test/op_tests/vulkan_sdpa_test
980+
974981
# Run e2e testing for selected operators. More operators will be tested via this
975982
# route in the future.
976983
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"

.github/workflows/trunk.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ on:
88
tags:
99
- ciflow/trunk/*
1010
pull_request:
11+
paths:
12+
- .ci/docker/ci_commit_pins/pytorch.txt
13+
- .ci/scripts/**
1114
workflow_dispatch:
1215

1316
concurrency:

backends/apple/coreml/recipes/coreml_recipe_provider.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def create_recipe(
6969
recipe_type, activation_dtype=torch.float32, **kwargs
7070
)
7171
elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL:
72+
self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "torchao")
7273
return self._build_torchao_quantized_recipe(
7374
recipe_type,
7475
weight_dtype=torch.int4,
@@ -77,6 +78,7 @@ def create_recipe(
7778
)
7879
elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP:
7980
group_size = kwargs.pop("group_size", 32)
81+
self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "torchao")
8082
return self._build_torchao_quantized_recipe(
8183
recipe_type,
8284
weight_dtype=torch.int4,
@@ -85,11 +87,14 @@ def create_recipe(
8587
**kwargs,
8688
)
8789
elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL:
90+
self._validate_and_set_deployment_target(kwargs, ct.target.iOS16, "torchao")
8891
return self._build_torchao_quantized_recipe(
8992
recipe_type, weight_dtype=torch.int8, is_per_channel=True, **kwargs
9093
)
9194
elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP:
9295
group_size = kwargs.pop("group_size", 32)
96+
# override minimum_deployment_target to ios18 for torchao (GH issue #13122)
97+
self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "torchao")
9398
return self._build_torchao_quantized_recipe(
9499
recipe_type,
95100
weight_dtype=torch.int8,
@@ -312,8 +317,6 @@ def _build_torchao_quantized_recipe(
312317
ao_quantization_configs=[config],
313318
)
314319

315-
# override minimum_deployment_target to ios18 for torchao (GH issue #13122)
316-
self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "torchao")
317320
lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
318321

319322
return ExportRecipe(

backends/apple/coreml/test/test_coreml_quantizer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
)
1616

1717
from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
18-
from torch.export import export_for_training
18+
from torch.export import export
1919
from torchao.quantization.pt2e.quantize_pt2e import (
2020
convert_pt2e,
2121
prepare_pt2e,
@@ -32,9 +32,7 @@ def quantize_and_compare(
3232
) -> None:
3333
assert quantization_type in {"PTQ", "QAT"}
3434

35-
pre_autograd_aten_dialect = export_for_training(
36-
model, example_inputs, strict=True
37-
).module()
35+
pre_autograd_aten_dialect = export(model, example_inputs, strict=True).module()
3836

3937
quantization_config = LinearQuantizerConfig.from_dict(
4038
{

0 commit comments

Comments
 (0)