Skip to content

Commit 6d38e3b

Browse files
zingoagrima1304
authored andcommitted
Merge branch 'main' into op-floor-div
Signed-off-by: Agrima Khare <[email protected]> Change-Id: I63ea1039cfd944a959bdae8db02f48cc4af2cb96
2 parents 5f5bf9b + ed91b6a commit 6d38e3b

File tree

384 files changed

+4981
-3217
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

384 files changed

+4981
-3217
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ build_qnn_backend() {
1818
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1919

2020
parallelism=$(( $(nproc) - 1 ))
21-
bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release
21+
bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release
2222
}
2323

2424
set_up_aot() {

.ci/scripts/setup-windows-msvc.ps1

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
conda create --yes --quiet -n et python=3.12
2+
conda activate et
3+
4+
# Install cmake
5+
conda install -y cmake
6+
7+
# Activate the VS environment - this is required for MSVC to work
8+
# There are a bunch of environment variables that it requires.
9+
# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
10+
& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
11+
12+
# Install CI requirements
13+
pip install -r .ci/docker/requirements-ci.txt
14+
15+
# Create build directory
16+
$buildDir = "cmake-out-msvc"
17+
if (Test-Path -Path $buildDir) {
18+
Remove-Item -Path $buildDir -Recurse -Force
19+
}
20+
New-Item -Path $buildDir -ItemType Directory
21+
22+
# Configure CMake with MSVC (not ClangCL) and disable custom/quantized ops
23+
cmake -S . -B $buildDir `
24+
-DCMAKE_BUILD_TYPE=Release `
25+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON `
26+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON `
27+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON `
28+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON `
29+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON `
30+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON `
31+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
32+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF `
33+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF `
34+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF `
35+
-DEXECUTORCH_BUILD_XNNPACK=ON `
36+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON `
37+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON
38+
39+
if ($LASTEXITCODE -ne 0) {
40+
Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
41+
exit $LASTEXITCODE
42+
}
43+
44+
# Build with MSVC
45+
cmake --build $buildDir --config Release -j16
46+
47+
if ($LASTEXITCODE -ne 0) {
48+
Write-Host "Build failed. Exit code: $LASTEXITCODE."
49+
exit $LASTEXITCODE
50+
}
51+
52+
Write-Host "MSVC build completed successfully!"

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 0 additions & 69 deletions
This file was deleted.

.ci/scripts/test_qnn_static_llm.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euxo pipefail
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
TASK_NAME=$1
13+
if [[ -z "${TASK_NAME:-}" ]]; then
14+
echo "Missing task name, exiting..."
15+
exit 1
16+
fi
17+
18+
19+
# Download QNN_SDK. If already downloaded, export environment path
20+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
21+
install_qnn
22+
23+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
24+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
25+
export PYTHONPATH=".."
26+
cp schema/program.fbs exir/_serialize/program.fbs
27+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
28+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
29+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
30+
31+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
32+
PYTHON_EXECUTABLE=python3
33+
fi
34+
35+
which "${PYTHON_EXECUTABLE}"
36+
37+
# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
38+
pip install graphviz
39+
40+
set +e
41+
42+
echo "Executing task: $TASK_NAME"
43+
if [[ "${TASK_NAME}" == "stories_110m" ]]; then
44+
# Download stories llama110m artifacts
45+
download_stories_model_artifacts
46+
echo "Creating tokenizer.bin"
47+
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
48+
49+
# Compile only as weight sharing is not applicable on x86.
50+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
51+
exit_code1=$?
52+
53+
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
54+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
55+
exit_code2=$?
56+
57+
# Check the exit codes and print messages
58+
if [ $exit_code1 -ne 0 ]; then
59+
echo "Static Llama compile only with weight sharing test failed. $exit_code1."
60+
fi
61+
62+
if [ $exit_code2 -ne 0 ]; then
63+
echo "Static Llama accuracy test failed. $exit_code2."
64+
fi
65+
66+
if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
67+
exit 1
68+
else
69+
exit 0
70+
fi
71+
72+
elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
73+
74+
# Check BC
75+
bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
76+
exit_code1=$?
77+
if [ $exit_code1 -ne 0 ]; then
78+
exit 1
79+
else
80+
exit 0
81+
fi
82+
83+
elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
84+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
85+
exit_code1=$?
86+
if [ $exit_code1 -ne 0 ]; then
87+
exit 1
88+
else
89+
exit 0
90+
fi
91+
else
92+
echo "Unsupported task: $TASK_NAME"
93+
exit 1
94+
fi

.github/workflows/_unittest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
id-token: write
3333
contents: read
3434
with:
35-
runner: linux.2xlarge
35+
runner: linux.2xlarge.memory
3636
docker-image: ${{ inputs.docker-image }}
3737
submodules: 'recursive'
3838
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

.github/workflows/cuda.yml

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ jobs:
8989
9090
export-voxtral-cuda-artifact:
9191
name: export-voxtral-cuda-${{ matrix.quant.name }}
92+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
93+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
9294
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9395
permissions:
9496
id-token: write
@@ -126,7 +128,7 @@ jobs:
126128
echo "::endgroup::"
127129
128130
echo "::group::Setup Huggingface"
129-
pip install -U "huggingface_hub[cli]" accelerate
131+
pip install -U "huggingface_hub[cli]<1.0" accelerate
130132
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
131133
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
132134
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -166,6 +168,8 @@ jobs:
166168
167169
export-gemma3-cuda-artifact:
168170
name: export-gemma3-cuda-${{ matrix.quant.name }}
171+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
172+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
169173
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
170174
permissions:
171175
id-token: write
@@ -176,12 +180,12 @@ jobs:
176180
matrix:
177181
quant:
178182
- name: "non-quantized"
179-
artifact: "voxtral-cuda-export"
183+
artifact: "gemma3-cuda-export"
180184
extra_args: ""
181-
# TODO: enable gemma3 quantization
182-
# - name: "quantized-int4-tile-packed"
183-
# artifact: "voxtral-cuda-quantized-int4-tile-packed"
184-
# extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
185+
- name: "quantized-int4-tile-packed"
186+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
187+
extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
188+
# TODO: enable int4-weight-only on gemma3.
185189
# - name: "quantized-int4-weight-only"
186190
# artifact: "voxtral-cuda-quantized-int4-weight-only"
187191
# # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -194,7 +198,7 @@ jobs:
194198
gpu-arch-version: 12.6
195199
use-custom-docker-registry: false
196200
submodules: recursive
197-
upload-artifact: gemma3-cuda-export
201+
upload-artifact: ${{ matrix.quant.artifact }}
198202
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
199203
script: |
200204
set -eux
@@ -204,7 +208,7 @@ jobs:
204208
echo "::endgroup::"
205209
206210
echo "::group::Setup Huggingface"
207-
pip install -U "huggingface_hub[cli]" accelerate
211+
pip install -U "huggingface_hub[cli]<1.0" accelerate
208212
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
209213
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
210214
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -255,7 +259,7 @@ jobs:
255259
set -eux
256260
257261
echo "::group::Setup ExecuTorch Requirements"
258-
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
262+
./install_requirements.sh
259263
pip list
260264
echo "::endgroup::"
261265
@@ -305,7 +309,7 @@ jobs:
305309
set -eux
306310
307311
echo "::group::Setup ExecuTorch Requirements"
308-
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
312+
./install_requirements.sh
309313
pip list
310314
echo "::endgroup::"
311315
@@ -363,7 +367,7 @@ jobs:
363367
set -eux
364368
365369
echo "::group::Setup ExecuTorch Requirements"
366-
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
370+
./install_requirements.sh
367371
pip list
368372
echo "::endgroup::"
369373
@@ -435,9 +439,9 @@ jobs:
435439
format:
436440
- name: "non-quantized"
437441
artifact: "gemma3-cuda-export"
438-
# TODO: enable quantized gemma3.
439-
# - name: "quantized-int4-tile-packed"
440-
# artifact: "gemma3-cuda-quantized-int4-tile-packed"
442+
- name: "quantized-int4-tile-packed"
443+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
444+
# TODO: enable int4-weight-only on gemma3.
441445
# - name: "quantized-int4-weight-only"
442446
# artifact: "gemma3-cuda-quantized-int4-weight-only"
443447
with:

.github/workflows/metal.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ jobs:
3030
3131
export-voxtral-metal-artifact:
3232
name: export-voxtral-metal-artifact
33+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
34+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
3335
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
3436
secrets: inherit
3537
with:
@@ -44,7 +46,7 @@ jobs:
4446
set -eux
4547
4648
echo "::group::Setup Huggingface"
47-
${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
49+
${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
4850
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
4951
echo "::endgroup::"
5052

.github/workflows/nightly.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,37 @@ jobs:
3636
uses: ./.github/workflows/_link_check.yml
3737
with:
3838
ref: ${{ github.sha }}
39+
40+
test-static-hf-llm-qnn-linux:
41+
name: test-static-hf-llm-qnn-linux
42+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
43+
permissions:
44+
id-token: write
45+
contents: read
46+
strategy:
47+
matrix:
48+
task: [smollm2_135m]
49+
fail-fast: false
50+
with:
51+
runner: linux.24xlarge
52+
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
53+
submodules: 'recursive'
54+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
55+
timeout: 900
56+
script: |
57+
# The generic Linux job chooses to use base env, not the one setup by the image
58+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
59+
conda activate "${CONDA_ENV}"
60+
61+
BUILD_TOOL="cmake"
62+
63+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
64+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
65+
66+
# Setup executorch
67+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
68+
69+
# Setup install_requirements for llama
70+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
71+
72+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llm.sh ${{ matrix.task }}

0 commit comments

Comments
 (0)