Skip to content

Commit 152b4aa

Browse files
authored
Merge branch 'main' into experimental/vkml_rank_0_io
2 parents fa6247e + 7d2b8c6 commit 152b4aa

File tree

136 files changed

+2753
-298
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+2753
-298
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
bd06b54e627fbfd354a2cffa4c80fb21883209a9
1+
44d8d54e38c0258357d4e92e1fefe21e845947a3

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ set_up_aot() {
3838
-DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM=ON \
3939
-DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM_RUNNER=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
41+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
4142
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
4243
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
4344
-DPYTHON_EXECUTABLE=python3

.ci/scripts/test_backend.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ fi
5959
if [[ "$FLOW" == *arm* ]]; then
6060
# Setup ARM deps.
6161
.ci/scripts/setup-arm-baremetal-tools.sh
62+
source examples/arm/ethos-u-scratch/setup_path.sh
6263

6364
if [[ "$FLOW" == *ethos_u* ]]; then
6465
# Prepare a test runner binary that can run on the Corstone-3x0 FVPs

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ cmake -DPYTHON_EXECUTABLE=python \
3131
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
3232
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
3333
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
34+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
3435
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3536
-DEXECUTORCH_BUILD_XNNPACK=OFF \
3637
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_model.sh

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,33 @@ prepare_artifacts_upload() {
4848
fi
4949
}
5050

51+
5152
build_cmake_executor_runner() {
5253
local backend_string_select="${1:-}"
5354
echo "Building executor_runner"
5455
rm -rf ${CMAKE_OUTPUT_DIR}
5556
mkdir ${CMAKE_OUTPUT_DIR}
57+
# Common options:
58+
COMMON="-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE"
5659
if [[ "$backend_string_select" == "XNNPACK" ]]; then
5760
echo "Backend $backend_string_select selected"
58-
(cd ${CMAKE_OUTPUT_DIR} \
59-
&& cmake -DCMAKE_BUILD_TYPE=Release \
61+
cmake -DCMAKE_BUILD_TYPE=Release \
6062
-DEXECUTORCH_BUILD_XNNPACK=ON \
61-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
63+
${COMMON} \
64+
-B${CMAKE_OUTPUT_DIR} .
65+
cmake --build ${CMAKE_OUTPUT_DIR} -j4
66+
elif [[ "$backend_string_select" == "CUDA" ]]; then
67+
echo "Backend $backend_string_select selected"
68+
cmake -DCMAKE_BUILD_TYPE=Release \
69+
-DEXECUTORCH_BUILD_CUDA=ON \
70+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
71+
${COMMON} \
72+
-B${CMAKE_OUTPUT_DIR} .
6273
cmake --build ${CMAKE_OUTPUT_DIR} -j4
6374
else
6475
cmake -DCMAKE_BUILD_TYPE=Debug \
6576
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
66-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
77+
${COMMON} \
6778
-B${CMAKE_OUTPUT_DIR} .
6879
cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
6980
fi
@@ -320,6 +331,13 @@ test_model_with_mediatek() {
320331
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "*.pte" -print -quit)
321332
}
322333

334+
test_model_with_cuda() {
335+
# Export a basic .pte and .ptd, then run the model.
336+
"${PYTHON_EXECUTABLE}" -m examples.cuda.scripts.export --model_name="${MODEL_NAME}" --output_dir "./"
337+
build_cmake_executor_runner "CUDA"
338+
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte" --data_path "./aoti_cuda_blob.ptd"
339+
}
340+
323341

324342
if [[ "${BACKEND}" == "portable" ]]; then
325343
echo "Testing ${MODEL_NAME} with portable kernels..."
@@ -372,6 +390,12 @@ elif [[ "${BACKEND}" == "mediatek" ]]; then
372390
if [[ $? -eq 0 ]]; then
373391
prepare_artifacts_upload
374392
fi
393+
elif [[ "${BACKEND}" == "cuda" ]]; then
394+
echo "Testing ${MODEL_NAME} with cuda..."
395+
test_model_with_cuda
396+
if [[ $? -eq 0 ]]; then
397+
prepare_artifacts_upload
398+
fi
375399
else
376400
set +e
377401
if [[ "${BACKEND}" == *"quantization"* ]]; then

.ci/scripts/test_torchao_huggingface_checkpoints.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
129129
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
130130
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
131131
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
132+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
132133
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
133134
-DEXECUTORCH_BUILD_XNNPACK=ON \
134135
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_yolo12.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ cmake_install_executorch_libraries() {
119119
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
120120
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
121121
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
122+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
123+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
122124
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
123125
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
124126
-B"${build_dir}"
@@ -131,6 +133,8 @@ cmake_install_executorch_libraries() {
131133
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
132134
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
133135
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
136+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
137+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
134138
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
135139
-DEXECUTORCH_ENABLE_LOGGING=ON \
136140
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \

.ci/scripts/utils.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,15 @@ build_executorch_runner_cmake() {
125125
clean_executorch_install_folders
126126
mkdir "${CMAKE_OUTPUT_DIR}"
127127

128-
pushd "${CMAKE_OUTPUT_DIR}" || return
129128
if [[ $1 == "Debug" ]]; then
130129
CXXFLAGS="-fsanitize=address,undefined"
131130
else
132131
CXXFLAGS=""
133132
fi
134-
CXXFLAGS="$CXXFLAGS" retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
135-
popd || return
133+
CXXFLAGS="$CXXFLAGS" retry cmake \
134+
-DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \
135+
-DCMAKE_BUILD_TYPE="${1:-Release}" \
136+
-B${CMAKE_OUTPUT_DIR} .
136137

137138
if [ "$(uname)" == "Darwin" ]; then
138139
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))

.github/workflows/cuda.yml

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Test ExecuTorch CUDA Build Compatibility
2+
# This workflow tests whether ExecuTorch can be successfully built with CUDA support
3+
# across different CUDA versions (12.6, 12.8, 12.9) using the command:
4+
# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
5+
#
6+
# Note: ExecuTorch automatically detects the system CUDA version using nvcc and
7+
# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
8+
9+
name: Test CUDA Builds
10+
11+
on:
12+
pull_request:
13+
push:
14+
branches:
15+
- main
16+
- release/*
17+
18+
concurrency:
19+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
20+
cancel-in-progress: false
21+
22+
jobs:
23+
test-cuda-builds:
24+
strategy:
25+
fail-fast: false
26+
matrix:
27+
cuda-version: ["12.6", "12.8", "13.0"]
28+
29+
name: test-executorch-cuda-build-${{ matrix.cuda-version }}
30+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
31+
permissions:
32+
id-token: write
33+
contents: read
34+
with:
35+
timeout: 90
36+
runner: linux.g5.4xlarge.nvidia.gpu
37+
gpu-arch-type: cuda
38+
gpu-arch-version: ${{ matrix.cuda-version }}
39+
use-custom-docker-registry: false
40+
submodules: recursive
41+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
42+
script: |
43+
set -eux
44+
45+
# Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version
46+
# and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
47+
source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
48+
49+
# This job will fail if any of the CUDA versions fail
50+
check-all-cuda-builds:
51+
needs: test-cuda-builds
52+
runs-on: ubuntu-latest
53+
if: always()
54+
steps:
55+
- name: Check if all CUDA builds succeeded
56+
run: |
57+
if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then
58+
echo "ERROR: One or more ExecuTorch CUDA builds failed!"
59+
echo "CUDA build results: ${{ needs.test-cuda-builds.result }}"
60+
exit 1
61+
else
62+
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
63+
fi
64+
65+
test-models-cuda:
66+
name: test-models-cuda
67+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
68+
permissions:
69+
id-token: write
70+
contents: read
71+
strategy:
72+
fail-fast: false
73+
matrix:
74+
model: [linear, add, add_mul, resnet18]
75+
with:
76+
timeout: 90
77+
runner: linux.g5.4xlarge.nvidia.gpu
78+
gpu-arch-type: cuda
79+
gpu-arch-version: 12.6
80+
use-custom-docker-registry: false
81+
submodules: recursive
82+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
83+
script: |
84+
set -eux
85+
86+
PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
87+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
88+
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
89+
90+
test-voxtral-cuda-e2e:
91+
name: test-voxtral-cuda-e2e
92+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
93+
permissions:
94+
id-token: write
95+
contents: read
96+
secrets: inherit
97+
strategy:
98+
fail-fast: false
99+
with:
100+
timeout: 90
101+
secrets-env: EXECUTORCH_HF_TOKEN
102+
runner: linux.g5.4xlarge.nvidia.gpu
103+
gpu-arch-type: cuda
104+
gpu-arch-version: 12.6
105+
use-custom-docker-registry: false
106+
submodules: recursive
107+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
108+
script: |
109+
set -eux
110+
111+
echo "::group::Setup ExecuTorch"
112+
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
113+
echo "::endgroup::"
114+
115+
echo "::group::Setup Huggingface"
116+
pip install -U "huggingface_hub[cli]" accelerate
117+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
118+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
119+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
120+
pip install mistral-common librosa
121+
echo "::endgroup::"
122+
123+
echo "::group::Export Voxtral"
124+
optimum-cli export executorch \
125+
--model "mistralai/Voxtral-Mini-3B-2507" \
126+
--task "multimodal-text-to-text" \
127+
--recipe "cuda" \
128+
--dtype bfloat16 \
129+
--device cuda \
130+
--max_seq_len 1024 \
131+
--output_dir ./
132+
echo "::endgroup::"
133+
134+
echo "::group::Build Voxtral Runner"
135+
cmake -DCMAKE_BUILD_TYPE=Release \
136+
-DEXECUTORCH_BUILD_CUDA=ON \
137+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
138+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
139+
-DEXECUTORCH_BUILD_TESTS=ON \
140+
-Bcmake-out .
141+
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner
142+
echo "::endgroup::"
143+
144+
echo "::group::Run Voxtral Runner"
145+
# Capture output and allow exit code 139 if we have the expected printout
146+
set +e
147+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
148+
OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1)
149+
EXIT_CODE=$?
150+
set -e
151+
152+
echo "$OUTPUT"
153+
154+
# Check if the output contains "Run latency (ms):"
155+
if echo "$OUTPUT" | grep -q "Run latency (ms):"; then
156+
echo "Found expected output: 'Run latency (ms):'"
157+
if [ $EXIT_CODE -eq 139 ]; then
158+
echo "Exit code 139 (segfault) detected, but passing since we have the expected output"
159+
exit 0
160+
elif [ $EXIT_CODE -ne 0 ]; then
161+
echo "Unexpected exit code: $EXIT_CODE"
162+
exit $EXIT_CODE
163+
else
164+
echo "Command succeeded with exit code 0"
165+
exit 0
166+
fi
167+
else
168+
echo "Expected output 'Run latency (ms):' not found in output"
169+
exit 1
170+
fi
171+
echo "::endgroup::"

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -909,12 +909,12 @@ jobs:
909909
contents: read
910910
secrets: inherit
911911
with:
912+
secrets-env: SAMSUNG_AI_LITECORE_KEY
912913
runner: linux.2xlarge
913914
docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
914915
submodules: 'recursive'
915916
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
916917
timeout: 90
917-
secrets-env: SAMSUNG_AI_LITECORE_KEY
918918
script: |
919919
set -ex
920920

0 commit comments

Comments
 (0)