Skip to content

Commit f8ab347

Browse files
authored
Merge branch 'main' into add-dim-order-clone-aot
2 parents 5bfd58b + 9fa7edf commit f8ab347

File tree

122 files changed

+2298
-1402
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+2298
-1402
lines changed

.ci/scripts/setup-windows.ps1

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
param (
2+
[string]$editable = $false
3+
)
4+
5+
conda create --yes --quiet -n et python=3.12
6+
conda activate et
7+
8+
# Activate the VS environment - this is required for Dynamo to work, as it uses MSVC.
9+
# There are a bunch of environment variables that it requires.
10+
# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
11+
& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
12+
13+
# Install test dependencies
14+
pip install -r .ci/docker/requirements-ci.txt
15+
16+
if ($editable -eq 'true') {
17+
install_executorch.bat --editable
18+
} else {
19+
install_executorch.bat
20+
}
21+
if ($LASTEXITCODE -ne 0) {
22+
Write-Host "Installation was unsuccessful. Exit code: $LASTEXITCODE."
23+
exit $LASTEXITCODE
24+
}

.ci/scripts/test_model.ps1

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
param (
2+
[string]$modelName,
3+
[string]$backend,
4+
[string]$buildDir = "cmake-out",
5+
[bool]$strict = $false
6+
)
7+
8+
Set-PSDebug -Trace 1
9+
$ErrorActionPreference = 'Stop'
10+
$PSNativeCommandUseErrorActionPreference = $true
11+
12+
function ExportModel-Portable {
13+
param (
14+
[string]$model_name,
15+
[bool]$strict
16+
)
17+
18+
$exportParams = "--model_name", "$modelName"
19+
if ($strict) {
20+
$exportParams += "--strict"
21+
}
22+
python -m examples.portable.scripts.export @exportParams | Write-Host
23+
if ($LASTEXITCODE -ne 0) {
24+
Write-Host "Model export failed. Exit code: $LASTEXITCODE."
25+
exit $LASTEXITCODE
26+
}
27+
28+
"$modelName.pte"
29+
}
30+
31+
function ExportModel-Xnnpack {
32+
param (
33+
[string]$model_name,
34+
[bool]$quantize
35+
)
36+
37+
if $(quantize) {
38+
python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize | Write-Host
39+
$modelFile = "$($modelName)_xnnpack_q8.pte"
40+
} else {
41+
python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate | Write-Host
42+
$modelFile = "$($modelName)_xnnpack_fp32.pte"
43+
}
44+
if ($LASTEXITCODE -ne 0) {
45+
Write-Host "Model export failed. Exit code: $LASTEXITCODE."
46+
exit $LASTEXITCODE
47+
}
48+
49+
$modelFile
50+
}
51+
52+
# Build the runner
53+
if (Test-Path -Path $buildDir) {
54+
Remove-Item -Path $buildDir -Recurse -Force
55+
}
56+
New-Item -Path $buildDir -ItemType Directory
57+
Push-Location $buildDir
58+
cmake .. --preset windows
59+
cmake --build . -t executor_runner -j16 --config Release
60+
if ($LASTEXITCODE -ne 0) {
61+
Write-Host "Runner build failed. Exit code: $LASTEXITCODE."
62+
exit $LASTEXITCODE
63+
}
64+
$executorBinaryPath = Join-Path -Path $buildDir -ChildPath "Release\executor_runner.exe"
65+
Pop-Location
66+
67+
# Export the model
68+
switch ($backend) {
69+
"portable" {
70+
$model_path = ExportModel-Portable -model_name $modelName -strict $strict
71+
}
72+
"xnnpack-f32" {
73+
$model_path = ExportModel-Xnnpack -model_name $modelName -quantize $false
74+
}
75+
"xnnpack-q8" {
76+
$model_path = ExportModel-Xnnpack -model_name $modelName -quantize $true
77+
}
78+
default {
79+
Write-Host "Unknown backend $backend."
80+
exit 1
81+
}
82+
}
83+
84+
# Run the runner
85+
& "$executorBinaryPath" --model_path="$model_path"
86+
if ($LASTEXITCODE -ne 0) {
87+
Write-Host "Model execution failed. Exit code: $LASTEXITCODE."
88+
exit $LASTEXITCODE
89+
}

.ci/scripts/unittest-windows.ps1

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
param (
2+
[string]$editable = $false
3+
)
4+
5+
Set-PSDebug -Trace 1
6+
$ErrorActionPreference = 'Stop'
7+
$PSNativeCommandUseErrorActionPreference = $true
8+
9+
# Run pytest with coverage
10+
# pytest -n auto --cov=./ --cov-report=xml
11+
pytest -v --full-trace -c pytest-windows.ini
12+
if ($LASTEXITCODE -ne 0) {
13+
Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
14+
exit $LASTEXITCODE
15+
}

.github/workflows/_unittest.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ on:
1919
required: false
2020
type: string
2121
description: Install ExecuTorch in editable mode or not.
22+
default: 'false'
2223
python-version:
2324
required: false
2425
type: string
@@ -52,3 +53,23 @@ jobs:
5253
# This is needed to get the prebuilt PyTorch wheel from S3
5354
${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
5455
.ci/scripts/unittest-macos.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}"
56+
57+
windows:
58+
if: ${{ inputs.build-tool == 'cmake' }}
59+
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
60+
with:
61+
submodules: 'recursive'
62+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
63+
timeout: 120
64+
script: |
65+
conda init powershell
66+
67+
powershell -Command "& {
68+
Set-PSDebug -Trace 1
69+
\$ErrorActionPreference = 'Stop'
70+
\$PSNativeCommandUseErrorActionPreference = \$true
71+
72+
.ci/scripts/setup-windows.ps1
73+
74+
powershell .ci/scripts/unittest-windows.ps1 -editable "${{ inputs.editable }}"
75+
}"

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: samsung_galaxy_s22+public
7777
run: |
7878
set -eux

.github/workflows/trunk.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,3 +979,27 @@ jobs:
979979
# Run MCU models
980980
chmod +x examples/arm/run_mcu_models_fvp.sh
981981
examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
982+
983+
test-models-windows:
984+
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
985+
strategy:
986+
fail-fast: false
987+
matrix:
988+
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
989+
backend: [portable, xnnpack-f32, xnnpack-q8]
990+
with:
991+
submodules: 'recursive'
992+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
993+
timeout: 60
994+
script: |
995+
conda init powershell
996+
997+
powershell -Command "& {
998+
Set-PSDebug -Trace 1
999+
\$ErrorActionPreference = 'Stop'
1000+
\$PSNativeCommandUseErrorActionPreference = \$true
1001+
1002+
.ci/scripts/setup-windows.ps1
1003+
1004+
powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
1005+
}"

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,7 @@ xcuserdata/
6565

6666
# Android
6767
*.aar
68+
69+
# Windows
70+
*.dll
71+
*.pyd

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,11 +198,22 @@ def dequantize_codebook(context, node):
198198

199199
# Assert codebook is as expected. codebook.dim() = codes.dim() + 2
200200
assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook"
201-
assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported"
202-
n_luts = codebook.shape[1]
203-
assert (
204-
codes.shape[1] % n_luts == 0
205-
), "codes.shape[1] must be divisible by codebook.shape[1]"
201+
assert (codebook.shape[0] == 1) or (
202+
codebook.shape[1] == 1
203+
), "Only grouped_channel granularity is supported"
204+
if codebook.shape[0] == 1:
205+
# LUT is per column group
206+
n_luts = codebook.shape[1]
207+
assert (
208+
codes.shape[1] % n_luts == 0
209+
), "codes.shape[1] must be divisible by codebook.shape[1]"
210+
else:
211+
# LUT is per row group
212+
n_luts = codebook.shape[0]
213+
assert (
214+
codes.shape[0] % n_luts == 0
215+
), "codes.shape[0] must be divisible by codebook.shape[0]"
216+
206217
assert codebook.shape[2] == 2**nbits
207218
assert codebook.shape[3] == 1, "Only scalar look up values are supported"
208219

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def _coreml_partitioner(self):
3535

3636
def _get_test_model(self):
3737
model = torch.nn.Sequential(
38-
torch.nn.Embedding(64, 128), torch.nn.Linear(128, 128), torch.nn.ReLU()
38+
torch.nn.Embedding(64, 128), torch.nn.Linear(128, 256), torch.nn.ReLU()
3939
)
4040
example_inputs = (torch.LongTensor([0]),)
4141
return model, example_inputs
@@ -158,7 +158,7 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
158158
et_prog = delegated_program.to_executorch()
159159
self._compare_outputs(et_prog, model, example_inputs)
160160

161-
def test_dequantize_codebook_linear(self):
161+
def test_dequantize_codebook_linear_per_grouped_col(self):
162162
model, example_inputs = self._get_test_model()
163163
quantize_(
164164
model,
@@ -185,7 +185,34 @@ def test_dequantize_codebook_linear(self):
185185
et_prog = delegated_program.to_executorch()
186186
self._compare_outputs(et_prog, model, example_inputs)
187187

188-
def test_dequantize_codebook_embedding(self):
188+
def test_dequantize_codebook_linear_per_grouped_row(self):
189+
model, example_inputs = self._get_test_model()
190+
quantize_(
191+
model,
192+
CodebookWeightOnlyConfig(dtype=torch.uint2, block_size=[16, -1]),
193+
)
194+
ep = torch.export.export(model, example_inputs)
195+
assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
196+
delegated_program = executorch.exir.to_edge_transform_and_lower(
197+
ep,
198+
partitioner=[self._coreml_partitioner()],
199+
)
200+
for node in delegated_program.exported_program().graph.nodes:
201+
if node.op == "call_function":
202+
assert node.target.__name__ in [
203+
"executorch_call_delegate",
204+
"getitem",
205+
], f"Got unexpected node target after delegation: {node.target.__name__}"
206+
207+
assert (
208+
"executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
209+
in format_delegated_graph(delegated_program.exported_program().graph_module)
210+
)
211+
212+
et_prog = delegated_program.to_executorch()
213+
self._compare_outputs(et_prog, model, example_inputs)
214+
215+
def test_dequantize_codebook_embedding_per_grouped_col(self):
189216
model, example_inputs = self._get_test_model()
190217
quantize_(
191218
model,
@@ -212,7 +239,35 @@ def test_dequantize_codebook_embedding(self):
212239

213240
et_prog = delegated_program.to_executorch()
214241
self._compare_outputs(et_prog, model, example_inputs)
242+
243+
def test_dequantize_codebook_embedding_per_grouped_row(self):
244+
model, example_inputs = self._get_test_model()
245+
quantize_(
246+
model,
247+
CodebookWeightOnlyConfig(dtype=torch.uint3, block_size=[16, -1]),
248+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
249+
)
250+
ep = torch.export.export(model, example_inputs)
251+
assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
252+
delegated_program = executorch.exir.to_edge_transform_and_lower(
253+
ep,
254+
partitioner=[self._coreml_partitioner()],
255+
)
256+
for node in delegated_program.exported_program().graph.nodes:
257+
if node.op == "call_function":
258+
assert node.target.__name__ in [
259+
"executorch_call_delegate",
260+
"getitem",
261+
], f"Got unexpected node target after delegation: {node.target.__name__}"
262+
263+
assert (
264+
"executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
265+
in format_delegated_graph(delegated_program.exported_program().graph_module)
266+
)
215267

268+
et_prog = delegated_program.to_executorch()
269+
self._compare_outputs(et_prog, model, example_inputs)
270+
216271
def test__clone_dim_order_contiguous(self):
217272
class Model(torch.nn.Module):
218273
def forward(self, x):
@@ -243,6 +298,8 @@ def forward(self, x):
243298
test_runner.test_dequantize_affine_c4w_embedding()
244299
test_runner.test_dequantize_affine_c4w_linear()
245300
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
246-
test_runner.test_dequantize_codebook_linear()
247-
test_runner.test_dequantize_codebook_embedding()
301+
test_runner.test_dequantize_codebook_linear_per_grouped_col()
302+
test_runner.test_dequantize_codebook_linear_per_grouped_row()
303+
test_runner.test_dequantize_codebook_embedding_per_grouped_col()
304+
test_runner.test_dequantize_codebook_embedding_per_grouped_row()
248305
test_runner.test__clone_dim_order_contiguous()

backends/arm/arm_backend.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# backends. Converts via TOSA as an intermediate form supported by AoT and
1111
# JIT compiler flows.
1212
#
13+
from enum import Enum
1314
from typing import List, Optional
1415

1516
from executorch.backends.arm.tosa_specification import ( # type: ignore[import-not-found]
@@ -22,12 +23,16 @@
2223

2324

2425
class ArmCompileSpecBuilder:
26+
class DebugMode(Enum):
27+
JSON = 1
28+
2529
def __init__(self):
2630
self.compile_spec: List[CompileSpec] = []
2731
self.compiler_flags = []
2832
self.output_format = None
2933
self.path_for_intermediates = None
3034
self.tosa_spec = None
35+
self.tosa_debug_mode = None
3136

3237
def vgf_compile_spec(
3338
self,
@@ -163,6 +168,13 @@ def dump_intermediate_artifacts_to(
163168
self.path_for_intermediates = output_path
164169
return self
165170

171+
def dump_debug_info(self, debug_mode: DebugMode) -> "ArmCompileSpecBuilder":
172+
"""
173+
Dump debugging information into the intermediates path
174+
"""
175+
self.tosa_debug_mode = debug_mode.name
176+
return self
177+
166178
def build(self) -> List[CompileSpec]:
167179
"""
168180
Generate a list of compile spec objects from the builder
@@ -188,6 +200,16 @@ def build(self) -> List[CompileSpec]:
188200
CompileSpec("debug_artifact_path", self.path_for_intermediates.encode())
189201
)
190202

203+
if self.tosa_debug_mode is not None:
204+
if not self.path_for_intermediates:
205+
raise ValueError(
206+
"dump_debug_info() must be used in conjunction with dump_intermediate_artifacts_to()"
207+
)
208+
209+
self.compile_spec.append(
210+
CompileSpec("dump_debug_info", self.tosa_debug_mode.encode())
211+
)
212+
191213
return self.compile_spec
192214

193215

0 commit comments

Comments
 (0)