Skip to content

Commit c50a55f

Browse files
committed
Merge branch 'main' of https://github.com/pytorch/executorch into change-1078286
2 parents f3bd07c + 4efd79c commit c50a55f

File tree

411 files changed

+9421
-4261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

411 files changed

+9421
-4261
lines changed

.ci/scripts/setup-windows-msvc.ps1

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
conda create --yes --quiet -n et python=3.12
2+
conda activate et
3+
4+
# Install cmake
5+
conda install -y cmake
6+
7+
# Activate the VS environment - this is required for MSVC to work
8+
# There are a bunch of environment variables that it requires.
9+
# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
10+
& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
11+
12+
# Install CI requirements
13+
pip install -r .ci/docker/requirements-ci.txt
14+
15+
# Create build directory
16+
$buildDir = "cmake-out-msvc"
17+
if (Test-Path -Path $buildDir) {
18+
Remove-Item -Path $buildDir -Recurse -Force
19+
}
20+
New-Item -Path $buildDir -ItemType Directory
21+
22+
# Configure CMake with MSVC (not ClangCL) and disable custom/quantized ops
23+
cmake -S . -B $buildDir `
24+
-DCMAKE_BUILD_TYPE=Release `
25+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON `
26+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON `
27+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON `
28+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON `
29+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON `
30+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON `
31+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
32+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF `
33+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF `
34+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF `
35+
-DEXECUTORCH_BUILD_XNNPACK=ON `
36+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON `
37+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON
38+
39+
if ($LASTEXITCODE -ne 0) {
40+
Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
41+
exit $LASTEXITCODE
42+
}
43+
44+
# Build with MSVC
45+
cmake --build $buildDir --config Release -j16
46+
47+
if ($LASTEXITCODE -ne 0) {
48+
Write-Host "Build failed. Exit code: $LASTEXITCODE."
49+
exit $LASTEXITCODE
50+
}
51+
52+
Write-Host "MSVC build completed successfully!"

.github/workflows/cuda.yml

Lines changed: 211 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ jobs:
8989
9090
export-voxtral-cuda-artifact:
9191
name: export-voxtral-cuda-${{ matrix.quant.name }}
92+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
93+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
9294
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9395
permissions:
9496
id-token: write
@@ -164,6 +166,77 @@ jobs:
164166
ls -al "${RUNNER_ARTIFACT_DIR}"
165167
echo "::endgroup::"
166168
169+
export-gemma3-cuda-artifact:
170+
name: export-gemma3-cuda-${{ matrix.quant.name }}
171+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
172+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
173+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
174+
permissions:
175+
id-token: write
176+
contents: read
177+
secrets: inherit
178+
strategy:
179+
fail-fast: false
180+
matrix:
181+
quant:
182+
- name: "non-quantized"
183+
artifact: "gemma3-cuda-export"
184+
extra_args: ""
185+
- name: "quantized-int4-tile-packed"
186+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
187+
extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
188+
# TODO: enable int4-weight-only on gemma3.
189+
# - name: "quantized-int4-weight-only"
190+
# artifact: "voxtral-cuda-quantized-int4-weight-only"
191+
# # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
192+
# extra_args: "--qlinear_encoder 4w"
193+
with:
194+
timeout: 90
195+
secrets-env: EXECUTORCH_HF_TOKEN
196+
runner: linux.g5.4xlarge.nvidia.gpu
197+
gpu-arch-type: cuda
198+
gpu-arch-version: 12.6
199+
use-custom-docker-registry: false
200+
submodules: recursive
201+
upload-artifact: ${{ matrix.quant.artifact }}
202+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
203+
script: |
204+
set -eux
205+
206+
echo "::group::Setup ExecuTorch"
207+
./install_executorch.sh
208+
echo "::endgroup::"
209+
210+
echo "::group::Setup Huggingface"
211+
pip install -U "huggingface_hub[cli]" accelerate
212+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
213+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
214+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
215+
pip list
216+
echo "::endgroup::"
217+
218+
echo "::group::Export Gemma3 (${{ matrix.quant.name }})"
219+
EXTRA_ARGS="${{ matrix.quant.extra_args }}"
220+
optimum-cli export executorch \
221+
--model "google/gemma-3-4b-it" \
222+
--task "multimodal-text-to-text" \
223+
--recipe "cuda" \
224+
--dtype bfloat16 \
225+
--device cuda \
226+
--max_seq_len 64 \
227+
--output_dir ./
228+
229+
test -f model.pte
230+
test -f aoti_cuda_blob.ptd
231+
echo "::endgroup::"
232+
233+
echo "::group::Store Gemma3 Artifacts (${{ matrix.quant.name }})"
234+
mkdir -p "${RUNNER_ARTIFACT_DIR}/"
235+
cp model.pte "${RUNNER_ARTIFACT_DIR}/"
236+
cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
237+
ls -al "${RUNNER_ARTIFACT_DIR}/"
238+
echo "::endgroup::"
239+
167240
benchmark-voxtral-cuda:
168241
name: benchmark-voxtral-cuda
169242
needs: export-voxtral-cuda-artifact
@@ -186,7 +259,7 @@ jobs:
186259
set -eux
187260
188261
echo "::group::Setup ExecuTorch Requirements"
189-
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
262+
./install_requirements.sh
190263
pip list
191264
echo "::endgroup::"
192265
@@ -204,13 +277,63 @@ jobs:
204277
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
205278
-DEXECUTORCH_BUILD_TESTS=ON \
206279
-Bcmake-out .
207-
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner
280+
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target multimodal_benchmark
208281
echo "::endgroup::"
209282
210283
echo "::group::Run Voxtral Benchmark"
211284
212285
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
213-
cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd
286+
cmake-out/backends/cuda/multimodal_benchmark voxtral model.pte aoti_cuda_blob.ptd
287+
288+
echo "::endgroup::"
289+
290+
benchmark-gemma3-cuda:
291+
name: benchmark-gemma3-cuda
292+
needs: export-gemma3-cuda-artifact
293+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
294+
permissions:
295+
id-token: write
296+
contents: read
297+
strategy:
298+
fail-fast: false
299+
with:
300+
timeout: 90
301+
runner: linux.g5.4xlarge.nvidia.gpu
302+
gpu-arch-type: cuda
303+
gpu-arch-version: 12.6
304+
use-custom-docker-registry: false
305+
submodules: recursive
306+
download-artifact: gemma3-cuda-export
307+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
308+
script: |
309+
set -eux
310+
311+
echo "::group::Setup ExecuTorch Requirements"
312+
./install_requirements.sh
313+
pip list
314+
echo "::endgroup::"
315+
316+
echo "::group::Prepare Gemma3 Artifacts"
317+
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
318+
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
319+
ls -al model.pte aoti_cuda_blob.ptd
320+
echo "::endgroup::"
321+
322+
echo "::group::Build Gemma3 Benchmark"
323+
cmake -DCMAKE_BUILD_TYPE=Release \
324+
-DEXECUTORCH_BUILD_CUDA=ON \
325+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
326+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
327+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
328+
-DEXECUTORCH_BUILD_TESTS=ON \
329+
-Bcmake-out .
330+
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target multimodal_benchmark
331+
echo "::endgroup::"
332+
333+
echo "::group::Run Gemma3 Benchmark"
334+
335+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
336+
cmake-out/backends/cuda/multimodal_benchmark gemma3 model.pte aoti_cuda_blob.ptd
214337
215338
echo "::endgroup::"
216339
@@ -244,7 +367,7 @@ jobs:
244367
set -eux
245368
246369
echo "::group::Setup ExecuTorch Requirements"
247-
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
370+
./install_requirements.sh
248371
pip list
249372
echo "::endgroup::"
250373
@@ -302,3 +425,87 @@ jobs:
302425
exit $EXIT_CODE
303426
fi
304427
echo "::endgroup::"
428+
429+
test-gemma3-cuda-e2e:
430+
name: test-gemma3-cuda-e2e-${{ matrix.format.name }}
431+
needs: export-gemma3-cuda-artifact
432+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
433+
permissions:
434+
id-token: write
435+
contents: read
436+
strategy:
437+
fail-fast: false
438+
matrix:
439+
format:
440+
- name: "non-quantized"
441+
artifact: "gemma3-cuda-export"
442+
- name: "quantized-int4-tile-packed"
443+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
444+
# TODO: enable int4-weight-only on gemma3.
445+
# - name: "quantized-int4-weight-only"
446+
# artifact: "gemma3-cuda-quantized-int4-weight-only"
447+
with:
448+
timeout: 90
449+
runner: linux.g5.4xlarge.nvidia.gpu
450+
gpu-arch-type: cuda
451+
gpu-arch-version: 12.6
452+
use-custom-docker-registry: false
453+
submodules: recursive
454+
download-artifact: ${{ matrix.format.artifact }}
455+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
456+
script: |
457+
set -eux
458+
459+
echo "::group::Setup ExecuTorch Requirements"
460+
./install_requirements.sh
461+
pip list
462+
echo "::endgroup::"
463+
464+
echo "::group::Prepare Gemma3 Artifacts (${{ matrix.format.name }})"
465+
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
466+
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
467+
TOKENIZER_URL="https://huggingface.co/unsloth/gemma-3-1b-it/resolve/main/tokenizer.json"
468+
curl -L $TOKENIZER_URL -o tokenizer.json
469+
ls -al model.pte aoti_cuda_blob.ptd tokenizer.json
470+
IMAGE_PATH="docs/source/_static/img/et-logo.png"
471+
echo "::endgroup::"
472+
473+
echo "::group::Build Gemma3 Runner"
474+
cmake --preset llm \
475+
-DEXECUTORCH_BUILD_CUDA=ON \
476+
-DCMAKE_INSTALL_PREFIX=cmake-out \
477+
-DCMAKE_BUILD_TYPE=Release \
478+
-Bcmake-out -S.
479+
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release
480+
481+
cmake -DEXECUTORCH_BUILD_CUDA=ON \
482+
-DCMAKE_BUILD_TYPE=Release \
483+
-Sexamples/models/gemma3 \
484+
-Bcmake-out/examples/models/gemma3/
485+
cmake --build cmake-out/examples/models/gemma3 --target gemma3_e2e_runner --config Release
486+
echo "::endgroup::"
487+
488+
echo "::group::Run Gemma3 Runner (${{ matrix.format.name }})"
489+
set +e
490+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
491+
OUTPUT=$(cmake-out/examples/models/gemma3/gemma3_e2e_runner \
492+
--model_path model.pte \
493+
--data_path aoti_cuda_blob.ptd \
494+
--tokenizer_path tokenizer.json \
495+
--image_path $IMAGE_PATH \
496+
--temperature 0 2>&1)
497+
EXIT_CODE=$?
498+
set -e
499+
500+
echo "$OUTPUT"
501+
502+
if ! echo "$OUTPUT" | grep -iq "chip"; then
503+
echo "Expected output 'chip' not found in output"
504+
exit 1
505+
fi
506+
507+
if [ $EXIT_CODE -ne 0 ]; then
508+
echo "Unexpected exit code: $EXIT_CODE"
509+
exit $EXIT_CODE
510+
fi
511+
echo "::endgroup::"

.github/workflows/windows-msvc.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Windows MSVC Build
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- release/*
8+
tags:
9+
- ciflow/trunk/*
10+
pull_request:
11+
paths:
12+
- .ci/docker/ci_commit_pins/pytorch.txt
13+
- .ci/scripts/**
14+
workflow_dispatch:
15+
16+
concurrency:
17+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
18+
cancel-in-progress: true
19+
20+
jobs:
21+
build-windows-msvc:
22+
name: build-windows-msvc
23+
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
24+
with:
25+
submodules: 'recursive'
26+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
27+
timeout: 60
28+
script: |
29+
conda init powershell
30+
powershell -Command "& {
31+
Set-PSDebug -Trace 1
32+
\$ErrorActionPreference = 'Stop'
33+
\$PSNativeCommandUseErrorActionPreference = \$true
34+
.ci/scripts/setup-windows-msvc.ps1
35+
}"

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ xcuserdata/
6262
/include/
6363
/share/
6464
/version.py
65-
*.csv
6665
*_etdump
6766

6867
# Android

.mypy.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ ignore_missing_imports = True
8383
[mypy-tosa_tools.*]
8484
ignore_missing_imports = True
8585

86+
[mypy-tosa_serializer]
87+
ignore_missing_imports = True
88+
89+
[mypy-tosa_serializer.*]
90+
ignore_missing_imports = True
91+
8692
[mypy-setuptools.*]
8793
ignore_missing_imports = True
8894

CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ executorch
3333
│ ├── <a href="backends/openvino">openvino</a> - OpenVINO backend for Intel hardware.
3434
│ ├── <a href="backends/qualcomm">qualcomm</a> - Qualcomm-specific backends. See <a href="docs/source/backends-qualcomm.md">doc</a>.
3535
│ ├── <a href="backends/transforms">transforms</a> - Transformations for backend optimization.
36-
│ ├── <a href="backends/vulkan">vulkan</a> - Vulkan backend for cross-platform GPU support. See <a href="docs/source/backends-vulkan.md">doc</a>.
37-
│ └── <a href="backends/xnnpack">xnnpack</a> - XNNPACK backend for optimized neural network operations. See <a href="docs/source/backends-xnnpack.md">doc</a>.
36+
│ ├── <a href="backends/vulkan">vulkan</a> - Vulkan backend for cross-platform GPU support. See <a href="docs/source/backends/vulkan/vulkan-overview.md">doc</a>.
37+
│ └── <a href="backends/xnnpack">xnnpack</a> - XNNPACK backend for optimized neural network operations. See <a href="docs/source/backends/xnnpack/xnnpack-overview.md">doc</a>.
3838
├── <a href="codegen">codegen</a> - Tooling to autogenerate bindings between kernels and the runtime.
3939
├── <a href="configurations">configurations</a> - Configuration files.
4040
├── <a href="devtools">devtools</a> - Model profiling, debugging, and inspection. Please refer to the <a href="docs/source/devtools-overview.md">tools documentation</a> for more information.

README-wheel.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ The `executorch` pip package is in beta.
1111
The prebuilt `executorch.runtime` module included in this package provides a way
1212
to run ExecuTorch `.pte` files, with some restrictions:
1313
* Only [core ATen operators](docs/source/ir-ops-set-definition.md) are linked into the prebuilt module
14-
* Only the [XNNPACK backend delegate](docs/source/backends-xnnpack.md) is linked into the prebuilt module.
14+
* Only the [XNNPACK backend delegate](docs/source/backends/xnnpack/xnnpack-overview.md) is linked into the prebuilt module.
1515
* \[macOS only] [Core ML](docs/source/backends/coreml/coreml-overview.md) and [MPS](docs/source/backends/mps/mps-overview.md) backend
1616
are also linked into the prebuilt module.
1717

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ ExecuTorch powers on-device AI at scale across Meta's family of apps, VR/AR devi
202202

203203
**LLMs:** [Llama 3.2/3.1/3](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [LiquidAI LFM2](examples/models/lfm2/README.md)
204204

205-
**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language)
205+
**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language), [Gemma](examples/models/gemma3) (vision-language)
206206

207207
**Vision/Speech:** [MobileNetV2](https://github.com/meta-pytorch/executorch-examples/tree/main/mv2), [DeepLabV3](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3), [Whisper](https://github.com/meta-pytorch/executorch-examples/tree/main/whisper/android/WhisperApp)
208208

0 commit comments

Comments
 (0)