Skip to content

Commit 88cfb1d

Browse files
Windows cuda export ci and set up script (#16255)
Add a script that tries to auto install mingw and the windows cuda rt. Sets up WINDOWS_CUDA_HOME as well for now though we should probably move away from using random env variables for stuff like this... To test add some export stuff to CI. Just verify that export and lowering succeeds against the usual suspects. A follow up will spin up some windows + cuda machines and try running whisper and voxtral.
1 parent 847d70d commit 88cfb1d

File tree

7 files changed

+339
-4
lines changed

7 files changed

+339
-4
lines changed

.ci/docker/build.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ case "${IMAGE_NAME}" in
6767
# From https://developer.android.com/ndk/downloads
6868
ANDROID_NDK_VERSION=r28c
6969
;;
70+
executorch-ubuntu-22.04-cuda-windows)
71+
LINTRUNNER=""
72+
GCC_VERSION=11
73+
CUDA_WINDOWS_CROSS_COMPILE=yes
74+
CUDA_VERSION=12.8
75+
SKIP_PYTORCH=yes
76+
;;
7077
*)
7178
echo "Invalid image name ${IMAGE_NAME}"
7279
exit 1
@@ -101,6 +108,8 @@ docker build \
101108
--build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
102109
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
103110
--build-arg "SKIP_PYTORCH=${SKIP_PYTORCH:-}" \
111+
--build-arg "CUDA_WINDOWS_CROSS_COMPILE=${CUDA_WINDOWS_CROSS_COMPILE:-}" \
112+
--build-arg "CUDA_VERSION=${CUDA_VERSION:-}" \
104113
-f "${OS}"/Dockerfile \
105114
"$@" \
106115
.

.ci/docker/common/install_cuda.sh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# Install Linux CUDA toolkit
9+
# This installs nvcc and other CUDA development tools needed for compiling CUDA code
10+
11+
set -ex
12+
13+
# CUDA version must be specified (e.g., 12.8)
14+
CUDA_VERSION="${CUDA_VERSION:?CUDA_VERSION must be set}"
15+
16+
# Convert version format (e.g., 12.8 -> 12-8 for package names)
17+
CUDA_VERSION_DASH=$(echo "${CUDA_VERSION}" | tr '.' '-')
18+
19+
# Add NVIDIA package repository
20+
apt-get update
21+
apt-get install -y --no-install-recommends \
22+
gnupg2 \
23+
ca-certificates \
24+
wget
25+
26+
# Download and install the CUDA keyring
27+
wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb" -O /tmp/cuda-keyring.deb
28+
dpkg -i /tmp/cuda-keyring.deb
29+
rm /tmp/cuda-keyring.deb
30+
31+
apt-get update
32+
33+
# Install CUDA toolkit (nvcc and development libraries)
34+
# We install a minimal set of packages needed for compilation:
35+
# - cuda-nvcc: The CUDA compiler
36+
# - cuda-cudart-dev: CUDA runtime development files
37+
# - cuda-nvrtc-dev: CUDA runtime compilation library
38+
# - libcublas-dev: cuBLAS development files
39+
# - libcusparse-dev: cuSPARSE development files
40+
# - libcufft-dev: cuFFT development files
41+
apt-get install -y --no-install-recommends \
42+
"cuda-nvcc-${CUDA_VERSION_DASH}" \
43+
"cuda-cudart-dev-${CUDA_VERSION_DASH}" \
44+
"cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \
45+
"libcublas-dev-${CUDA_VERSION_DASH}" \
46+
"libcusparse-dev-${CUDA_VERSION_DASH}" \
47+
"libcufft-dev-${CUDA_VERSION_DASH}"
48+
49+
# Clean up
50+
apt-get clean
51+
rm -rf /var/lib/apt/lists/*
52+
53+
# Verify installation
54+
/usr/local/cuda-${CUDA_VERSION}/bin/nvcc --version
55+
56+
echo "CUDA ${CUDA_VERSION} toolkit installation complete"
57+
echo "CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}"
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# Install mingw-w64 cross-compiler and Windows CUDA toolkit for cross-compilation
9+
10+
set -ex
11+
12+
INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}"
13+
14+
# Mapping of CUDA versions to their corresponding driver versions for Windows installers
15+
# Source: https://developer.nvidia.com/cuda-toolkit-archive
16+
declare -A CUDA_DRIVER_MAP=(
17+
["12.6"]="12.6.3:561.17"
18+
["12.8"]="12.8.1:572.61"
19+
["12.9"]="12.9.1:576.57"
20+
)
21+
22+
install_mingw() {
23+
echo "Installing mingw-w64 cross-compiler..."
24+
25+
apt-get update
26+
# Install the POSIX threads version of mingw-w64 which supports C++11 threading
27+
# primitives (std::mutex, std::condition_variable, std::shared_mutex).
28+
# The default win32 threads version does not support these.
29+
apt-get install -y --no-install-recommends \
30+
g++-mingw-w64-x86-64-posix \
31+
mingw-w64-tools \
32+
p7zip-full \
33+
wget
34+
35+
# Verify installation shows POSIX threads
36+
x86_64-w64-mingw32-g++ --version
37+
38+
# Cleanup
39+
apt-get clean
40+
rm -rf /var/lib/apt/lists/*
41+
42+
echo "mingw-w64 installation complete (POSIX threads version)"
43+
}
44+
45+
get_torch_cuda_version() {
46+
# Query PyTorch for its CUDA version using conda environment
47+
conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo ""
48+
}
49+
50+
install_windows_cuda() {
51+
# Get CUDA version from torch
52+
TORCH_CUDA_VERSION=$(get_torch_cuda_version)
53+
54+
if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
55+
echo "ERROR: Could not detect CUDA version from PyTorch."
56+
echo "Make sure PyTorch with CUDA support is installed before running this script."
57+
exit 1
58+
fi
59+
60+
echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
61+
62+
# Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
63+
CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
64+
65+
# Look up the full version and driver version
66+
if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
67+
echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."
68+
echo "Known versions: ${!CUDA_DRIVER_MAP[*]}"
69+
exit 1
70+
fi
71+
72+
CUDA_INFO="${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}"
73+
CUDA_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f1)
74+
CUDA_DRIVER_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f2)
75+
76+
echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}"
77+
78+
echo "Installing Windows CUDA toolkit ${CUDA_VERSION}..."
79+
80+
mkdir -p "${INSTALL_DIR}"
81+
cd "${INSTALL_DIR}"
82+
83+
CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe"
84+
CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}"
85+
86+
# Check if already downloaded and extracted
87+
if [ -d "${INSTALL_DIR}/extracted/cuda_cudart" ]; then
88+
echo "Windows CUDA toolkit already installed, skipping download..."
89+
return 0
90+
fi
91+
92+
echo "Downloading CUDA installer from ${CUDA_URL}..."
93+
wget -q "${CUDA_URL}" -O "${CUDA_INSTALLER}"
94+
95+
echo "Extracting CUDA toolkit..."
96+
7z x "${CUDA_INSTALLER}" -o"extracted" -y
97+
98+
# Fix permissions so ci-user can access the files
99+
chmod -R a+rX "${INSTALL_DIR}"
100+
101+
# Clean up installer to save space
102+
rm -f "${CUDA_INSTALLER}"
103+
104+
echo "Windows CUDA toolkit installation complete"
105+
echo "WINDOWS_CUDA_HOME=${INSTALL_DIR}/extracted/cuda_cudart/cudart"
106+
}
107+
108+
# Parse command line arguments
109+
INSTALL_MINGW=false
110+
INSTALL_CUDA=false
111+
112+
while [[ $# -gt 0 ]]; do
113+
case $1 in
114+
--mingw)
115+
INSTALL_MINGW=true
116+
shift
117+
;;
118+
--cuda)
119+
INSTALL_CUDA=true
120+
shift
121+
;;
122+
--all)
123+
INSTALL_MINGW=true
124+
INSTALL_CUDA=true
125+
shift
126+
;;
127+
*)
128+
echo "Unknown option: $1"
129+
echo "Usage: $0 [--mingw] [--cuda] [--all]"
130+
exit 1
131+
;;
132+
esac
133+
done
134+
135+
# Default to installing everything if no options specified
136+
if [ "${INSTALL_MINGW}" = false ] && [ "${INSTALL_CUDA}" = false ]; then
137+
INSTALL_MINGW=true
138+
INSTALL_CUDA=true
139+
fi
140+
141+
if [ "${INSTALL_MINGW}" = true ]; then
142+
install_mingw
143+
fi
144+
145+
if [ "${INSTALL_CUDA}" = true ]; then
146+
install_windows_cuda
147+
fi
148+
149+
echo "Installation complete"

.ci/docker/ubuntu/Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,5 +98,23 @@ ARG QNN_SDK
9898

9999
ARG MEDIATEK_SDK
100100

101+
ARG CUDA_WINDOWS_CROSS_COMPILE
102+
ARG CUDA_VERSION
103+
COPY ./common/install_cuda.sh install_cuda.sh
104+
COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_compile.sh
105+
COPY ./common/utils.sh utils.sh
106+
RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
107+
CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
108+
bash ./install_cuda_windows_cross_compile.sh; \
109+
fi
110+
RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
111+
# Set up CUDA environment for Linux compilation (nvcc, etc.)
112+
ENV CUDA_HOME=/usr/local/cuda
113+
ENV PATH=${CUDA_HOME}/bin:${PATH}
114+
# Ensure system libstdc++ is found before conda's (GLIBCXX_3.4.30 compatibility)
115+
ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
116+
# Windows CUDA for cross-compilation
117+
ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart
118+
101119
USER ci-user
102120
CMD ["bash"]

.ci/scripts/export_model_artifact.sh

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,13 @@ OUTPUT_DIR="${4:-.}"
5858
case "$DEVICE" in
5959
cuda)
6060
;;
61+
cuda-windows)
62+
;;
6163
metal)
6264
;;
6365
*)
6466
echo "Error: Unsupported device '$DEVICE'"
65-
echo "Supported devices: cuda, metal"
67+
echo "Supported devices: cuda, cuda-windows, metal"
6668
exit 1
6769
;;
6870
esac
@@ -147,7 +149,7 @@ if [ -n "$MAX_SEQ_LEN" ]; then
147149
fi
148150

149151
DEVICE_ARG=""
150-
if [ "$DEVICE" = "cuda" ]; then
152+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
151153
DEVICE_ARG="--device cuda"
152154
fi
153155

@@ -169,8 +171,15 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then
169171
--output_file $PREPROCESSOR_OUTPUT
170172
fi
171173

174+
# Determine blob file name - cuda and cuda-windows both use aoti_cuda_blob.ptd
175+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
176+
BLOB_FILE="aoti_cuda_blob.ptd"
177+
else
178+
BLOB_FILE="aoti_${DEVICE}_blob.ptd"
179+
fi
180+
172181
test -f model.pte
173-
test -f aoti_${DEVICE}_blob.ptd
182+
test -f $BLOB_FILE
174183
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
175184
test -f $PREPROCESSOR_OUTPUT
176185
fi
@@ -179,7 +188,7 @@ echo "::endgroup::"
179188
echo "::group::Store $MODEL_NAME Artifacts"
180189
mkdir -p "${OUTPUT_DIR}"
181190
mv model.pte "${OUTPUT_DIR}/"
182-
mv aoti_${DEVICE}_blob.ptd "${OUTPUT_DIR}/"
191+
mv $BLOB_FILE "${OUTPUT_DIR}/"
183192
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
184193
mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
185194
fi

.github/workflows/cuda-windows.yml

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Test ExecuTorch CUDA Windows Cross-Compilation Export
2+
# This workflow tests model export targeting CUDA Windows using optimum-executorch.
3+
# It runs on a Linux machine with CUDA and uses the executorch-ubuntu-22.04-cuda-windows
4+
# Docker image which has mingw and Windows CUDA SDK pre-installed for cross-compilation.
5+
6+
name: Test CUDA Windows Export
7+
8+
on:
9+
pull_request:
10+
push:
11+
branches:
12+
- main
13+
- release/*
14+
15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
17+
cancel-in-progress: false
18+
19+
jobs:
20+
export-model-cuda-windows-artifact:
21+
name: export-model-cuda-windows-artifact
22+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
23+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
24+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
25+
permissions:
26+
id-token: write
27+
contents: read
28+
secrets: inherit
29+
strategy:
30+
fail-fast: false
31+
matrix:
32+
model:
33+
- repo: "mistralai"
34+
name: "Voxtral-Mini-3B-2507"
35+
- repo: "openai"
36+
name: "whisper-small"
37+
- repo: "openai"
38+
name: "whisper-large-v3-turbo"
39+
- repo: "google"
40+
name: "gemma-3-4b-it"
41+
quant:
42+
- "non-quantized"
43+
- "quantized-int4-weight-only"
44+
exclude:
45+
# TODO: enable int4-weight-only on gemma3.
46+
- model:
47+
repo: "google"
48+
name: "gemma-3-4b-it"
49+
quant: "quantized-int4-weight-only"
50+
with:
51+
timeout: 90
52+
secrets-env: EXECUTORCH_HF_TOKEN
53+
runner: linux.g5.4xlarge.nvidia.gpu
54+
gpu-arch-type: cuda
55+
gpu-arch-version: 12.8
56+
docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows
57+
submodules: recursive
58+
upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }}
59+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
60+
script: |
61+
set -eux
62+
63+
echo "::group::Fix libstdc++ GLIBCXX version"
64+
# The executorch pybindings require GLIBCXX_3.4.30 which conda's libstdc++ doesn't have.
65+
# Replace conda's libstdc++ with the system version to fix ImportError.
66+
# Verify system version has GLIBCXX_3.4.30
67+
strings /usr/lib/x86_64-linux-gnu/libstdc++.so.6 | grep GLIBCXX_3.4.30
68+
# Backup and replace conda's version
69+
mv /opt/conda/lib/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6.bak || true
70+
ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6
71+
echo "::endgroup::"
72+
73+
echo "::group::Verify pre-installed dependencies"
74+
x86_64-w64-mingw32-g++ --version
75+
nvcc --version
76+
echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}"
77+
ls -la "${WINDOWS_CUDA_HOME}"
78+
echo "::endgroup::"
79+
80+
echo "::group::Setup ExecuTorch"
81+
PYTHON_EXECUTABLE=python ./install_executorch.sh
82+
echo "::endgroup::"
83+
84+
echo "::group::Setup Huggingface"
85+
pip install -U "huggingface_hub[cli]<1.0" accelerate
86+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
87+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
88+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
89+
echo "::endgroup::"
90+
91+
source .ci/scripts/export_model_artifact.sh cuda-windows "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"

0 commit comments

Comments
 (0)