Skip to content

Commit a954a75

Browse files
authored
Use cuda version PT when build with CUDA delegate (#14355)
This PR enables CUDA version PT installation when building with CUDA delegate enabled from source. More specific: 1. ET will keep depending on cpu PT as long as CUDA delegate is not enabled; 2. We will choose the CUDA PT exactly match user's cuda version: if user don't have CUDA, or have CUDA but not exactly match the versions PT supported, the installation script will raise error.
1 parent ce8916f commit a954a75

File tree

5 files changed

+375
-66
lines changed

5 files changed

+375
-66
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e
1+
53a2908a10f414a2f85caa06703a26a40e873869

.ci/scripts/test-cuda-build.sh

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
CUDA_VERSION=${1:-"12.6"}
11+
12+
echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ==="
13+
14+
# Function to build and test ExecutorTorch with CUDA support
15+
test_executorch_cuda_build() {
16+
local cuda_version=$1
17+
18+
echo "Building ExecutorTorch with CUDA ${cuda_version} support..."
19+
echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel"
20+
21+
# Check available resources before starting
22+
echo "=== System Information ==="
23+
echo "Available memory: $(free -h | grep Mem | awk '{print $2}')"
24+
echo "Available disk space: $(df -h . | tail -1 | awk '{print $4}')"
25+
echo "CPU cores: $(nproc)"
26+
echo "CUDA version check:"
27+
nvcc --version || echo "nvcc not found"
28+
nvidia-smi || echo "nvidia-smi not found"
29+
30+
# Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically
31+
export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
32+
33+
echo "=== Starting ExecutorTorch Installation ==="
34+
# Install ExecutorTorch with CUDA support with timeout and error handling
35+
timeout 5400 ./install_executorch.sh || {
36+
local exit_code=$?
37+
echo "ERROR: install_executorch.sh failed with exit code: $exit_code"
38+
if [ $exit_code -eq 124 ]; then
39+
echo "ERROR: Installation timed out after 90 minutes"
40+
fi
41+
exit $exit_code
42+
}
43+
44+
echo "SUCCESS: ExecutorTorch CUDA build completed"
45+
46+
# Verify the installation
47+
echo "=== Verifying ExecutorTorch CUDA Installation ==="
48+
49+
# Test that ExecutorTorch was built successfully
50+
python -c "
51+
import executorch
52+
print('SUCCESS: ExecutorTorch imported successfully')
53+
"
54+
55+
# Test CUDA availability and show details
56+
python -c "
57+
try:
58+
import torch
59+
print('INFO: PyTorch version:', torch.__version__)
60+
print('INFO: CUDA available:', torch.cuda.is_available())
61+
62+
if torch.cuda.is_available():
63+
print('SUCCESS: CUDA is available for ExecutorTorch')
64+
print('INFO: CUDA version:', torch.version.cuda)
65+
print('INFO: GPU device count:', torch.cuda.device_count())
66+
print('INFO: Current GPU device:', torch.cuda.current_device())
67+
print('INFO: GPU device name:', torch.cuda.get_device_name())
68+
69+
# Test basic CUDA tensor operation
70+
device = torch.device('cuda')
71+
x = torch.randn(10, 10).to(device)
72+
y = torch.randn(10, 10).to(device)
73+
z = torch.mm(x, y)
74+
print('SUCCESS: CUDA tensor operation completed on device:', z.device)
75+
print('INFO: Result tensor shape:', z.shape)
76+
77+
print('SUCCESS: ExecutorTorch CUDA integration verified')
78+
else:
79+
print('WARNING: CUDA not detected, but ExecutorTorch built successfully')
80+
exit(1)
81+
except Exception as e:
82+
print('ERROR: ExecutorTorch CUDA test failed:', e)
83+
exit(1)
84+
"
85+
86+
echo "SUCCESS: ExecutorTorch CUDA ${cuda_version} build and verification completed successfully"
87+
}
88+
89+
# Main execution
90+
echo "Current working directory: $(pwd)"
91+
echo "Directory contents:"
92+
ls -la
93+
94+
# Run the CUDA build test
95+
test_executorch_cuda_build "${CUDA_VERSION}"
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Test ExecuTorch CUDA Build Compatibility
2+
# This workflow tests whether ExecuTorch can be successfully built with CUDA support
3+
# across different CUDA versions (12.6, 12.8, 12.9) using the command:
4+
# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
5+
#
6+
# Note: ExecuTorch automatically detects the system CUDA version using nvcc and
7+
# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
8+
9+
name: Test CUDA Builds
10+
11+
on:
12+
pull_request:
13+
push:
14+
branches:
15+
- main
16+
- release/*
17+
18+
concurrency:
19+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
20+
cancel-in-progress: false
21+
22+
jobs:
23+
test-cuda-builds:
24+
strategy:
25+
fail-fast: false
26+
matrix:
27+
cuda-version: ["12.6", "12.8", "12.9"]
28+
29+
name: test-executorch-cuda-build-${{ matrix.cuda-version }}
30+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
31+
permissions:
32+
id-token: write
33+
contents: read
34+
with:
35+
timeout: 90
36+
runner: linux.g5.4xlarge.nvidia.gpu
37+
gpu-arch-type: cuda
38+
gpu-arch-version: ${{ matrix.cuda-version }}
39+
use-custom-docker-registry: false
40+
submodules: recursive
41+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
42+
script: |
43+
set -eux
44+
45+
# Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version
46+
# and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
47+
source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
48+
49+
# This job will fail if any of the CUDA versions fail
50+
check-all-cuda-builds:
51+
needs: test-cuda-builds
52+
runs-on: ubuntu-latest
53+
if: always()
54+
steps:
55+
- name: Check if all CUDA builds succeeded
56+
run: |
57+
if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then
58+
echo "ERROR: One or more ExecuTorch CUDA builds failed!"
59+
echo "CUDA build results: ${{ needs.test-cuda-builds.result }}"
60+
exit 1
61+
else
62+
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
63+
fi

install_requirements.py

Lines changed: 25 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -7,60 +7,22 @@
77

88
import argparse
99
import os
10-
import platform
11-
import re
1210
import subprocess
1311
import sys
1412

15-
16-
def python_is_compatible():
17-
# Scrape the version range from pyproject.toml, which should be in the current directory.
18-
version_specifier = None
19-
with open("pyproject.toml", "r") as file:
20-
for line in file:
21-
if line.startswith("requires-python"):
22-
match = re.search(r'"([^"]*)"', line)
23-
if match:
24-
version_specifier = match.group(1)
25-
break
26-
27-
if not version_specifier:
28-
print(
29-
"WARNING: Skipping python version check: version range not found",
30-
file=sys.stderr,
31-
)
32-
return False
33-
34-
# Install the packaging module if necessary.
35-
try:
36-
import packaging
37-
except ImportError:
38-
subprocess.run(
39-
[sys.executable, "-m", "pip", "install", "packaging"], check=True
40-
)
41-
# Compare the current python version to the range in version_specifier. Exits
42-
# with status 1 if the version is not compatible, or with status 0 if the
43-
# version is compatible or the logic itself fails.
44-
try:
45-
import packaging.specifiers
46-
import packaging.version
47-
48-
python_version = packaging.version.parse(platform.python_version())
49-
version_range = packaging.specifiers.SpecifierSet(version_specifier)
50-
if python_version not in version_range:
51-
print(
52-
f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"',
53-
file=sys.stderr,
54-
)
55-
return False
56-
except Exception as e:
57-
print(f"WARNING: Skipping python version check: {e}", file=sys.stderr)
58-
return True
59-
13+
from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible
6014

6115
# The pip repository that hosts nightly torch packages.
62-
TORCH_NIGHTLY_URL = "https://download.pytorch.org/whl/nightly/cpu"
16+
# This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled.
17+
TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly"
6318

19+
# Supported CUDA versions - modify this to add/remove supported versions
20+
# Format: tuple of (major, minor) version numbers
21+
SUPPORTED_CUDA_VERSIONS = (
22+
(12, 6),
23+
(12, 8),
24+
(12, 9),
25+
)
6426

6527
# Since ExecuTorch often uses main-branch features of pytorch, only the nightly
6628
# pip versions will have the required features.
@@ -71,7 +33,10 @@ def python_is_compatible():
7133
#
7234
# NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
7335
# by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
74-
NIGHTLY_VERSION = "dev20250906"
36+
#
37+
# NOTE: If you're changing, make the corresponding supported CUDA versions in
38+
# SUPPORTED_CUDA_VERSIONS above if needed.
39+
NIGHTLY_VERSION = "dev20250915"
7540

7641

7742
def install_requirements(use_pytorch_nightly):
@@ -84,12 +49,15 @@ def install_requirements(use_pytorch_nightly):
8449
)
8550
sys.exit(1)
8651

52+
# Determine the appropriate PyTorch URL based on CUDA delegate status
53+
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS)
54+
8755
# pip packages needed by exir.
8856
TORCH_PACKAGE = [
8957
# Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note
9058
# that we don't need to set any version number there because they have already
9159
# been installed on CI before this step, so pip won't reinstall them
92-
f"torch==2.9.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch",
60+
f"torch==2.10.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch",
9361
]
9462

9563
# Install the requirements for core ExecuTorch package.
@@ -105,7 +73,7 @@ def install_requirements(use_pytorch_nightly):
10573
"requirements-dev.txt",
10674
*TORCH_PACKAGE,
10775
"--extra-index-url",
108-
TORCH_NIGHTLY_URL,
76+
torch_url,
10977
],
11078
check=True,
11179
)
@@ -147,10 +115,13 @@ def install_requirements(use_pytorch_nightly):
147115

148116

149117
def install_optional_example_requirements(use_pytorch_nightly):
118+
# Determine the appropriate PyTorch URL based on CUDA delegate status
119+
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS)
120+
150121
print("Installing torch domain libraries")
151122
DOMAIN_LIBRARIES = [
152123
(
153-
f"torchvision==0.24.0.{NIGHTLY_VERSION}"
124+
f"torchvision==0.25.0.{NIGHTLY_VERSION}"
154125
if use_pytorch_nightly
155126
else "torchvision"
156127
),
@@ -165,7 +136,7 @@ def install_optional_example_requirements(use_pytorch_nightly):
165136
"install",
166137
*DOMAIN_LIBRARIES,
167138
"--extra-index-url",
168-
TORCH_NIGHTLY_URL,
139+
torch_url,
169140
],
170141
check=True,
171142
)
@@ -180,25 +151,14 @@ def install_optional_example_requirements(use_pytorch_nightly):
180151
"-r",
181152
"requirements-examples.txt",
182153
"--extra-index-url",
183-
TORCH_NIGHTLY_URL,
154+
torch_url,
184155
"--upgrade-strategy",
185156
"only-if-needed",
186157
],
187158
check=True,
188159
)
189160

190161

191-
# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source.
192-
# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024).
193-
def is_intel_mac_os():
194-
# Returns True if running on Intel macOS.
195-
return platform.system().lower() == "darwin" and platform.machine().lower() in (
196-
"x86",
197-
"x86_64",
198-
"i386",
199-
)
200-
201-
202162
def main(args):
203163
parser = argparse.ArgumentParser()
204164
parser.add_argument(

0 commit comments

Comments
 (0)