@@ -8,65 +8,109 @@ timeout: "55" # minutes
8
8
parametrize :
9
9
matrix : {}
10
10
include :
11
- # note that this is setting also all oldest requirements which is linked to Torch == 2.1
12
- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .1.1-py3.10-torch2.1 "
11
+ # note that this is setting also all oldest requirements which is linked to python == 3.10
12
+ - image : " nvidia/cuda:12 .1.1-runtime-ubuntu22.04 "
13
13
PACKAGE_NAME : " fabric"
14
+ python_version : " 3.10"
14
15
machine : " A100_X_2"
15
- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.8 "
16
+ - image : " nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
16
17
PACKAGE_NAME : " fabric"
18
+ python_version : " 3.12"
17
19
machine : " L4_X_2"
18
- # - image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7 "
20
+ # - image: "nvidia/cuda:12.6-runtime-ubuntu22.04 "
19
21
# PACKAGE_NAME: "fabric"
20
- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.8 "
22
+ - image : " nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
21
23
PACKAGE_NAME : " lightning"
24
+ python_version : " 3.12"
22
25
machine : " L4_X_2"
23
26
exclude : []
24
27
25
28
env :
29
+ TZ : " Etc/UTC"
30
+ DEBIAN_FRONTEND : " noninteractive"
31
+ CUDA_TOOLKIT_ROOT_DIR : " /usr/local/cuda"
32
+ MKL_THREADING_LAYER : " GNU"
33
+ CUDA_LAUNCH_BLOCKING : " 1"
34
+ NCCL_DEBUG : " INFO"
35
+ TORCHDYNAMO_VERBOSE : " 1"
26
36
FREEZE_REQUIREMENTS : " 1"
27
37
RUN_ONLY_CUDA_TESTS : " 1"
28
38
29
39
run : |
40
+ # Install Python and UV
41
+ apt-get update -qq --fix-missing
42
+ apt-get install -q -y software-properties-common curl
43
+ # Add deadsnakes PPA for newer Python versions if needed
44
+ add-apt-repository ppa:deadsnakes/ppa -y
45
+ apt-get update -qq --fix-missing
46
+ apt-get install -q -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
47
+ build-essential \
48
+ pkg-config \
49
+ cmake \
50
+ ca-certificates \
51
+ libopenmpi-dev \
52
+ openmpi-bin \
53
+ ninja-build \
54
+ libnccl2 \
55
+ libnccl-dev
56
+
57
+ apt-get install -y python${python_version} python${python_version}-venv python${python_version}-dev
58
+ ln -sf /usr/bin/python${python_version} /usr/bin/python
59
+ curl -LsSf https://astral.sh/uv/install.sh | sh
60
+
61
+ # Source the environment and ensure UV is in PATH
62
+ [ -f "$HOME/.local/bin/env" ] && . "$HOME/.local/bin/env"
63
+ export PATH="$HOME/.local/bin:$PATH"
64
+ source $HOME/.cargo/env 2>/dev/null || true
65
+ export PATH="$HOME/.cargo/bin:$PATH"
66
+
67
+ # Verify UV installation
68
+ command -v uv || (echo "UV not found in PATH" && exit 1)
69
+ # Create and activate a local uv virtual environment
70
+ uv venv .venv -p "/usr/bin/python${python_version}" || uv venv .venv -p "python${python_version}" || uv venv .venv
71
+ . .venv/bin/activate
72
+ hash -r
73
+
30
74
whereis nvidia
31
75
nvidia-smi
32
76
python --version
33
- pip --version
34
- pip install -q fire wget packaging
35
- pip list
77
+ uv --version
78
+ uv pip list
36
79
set -ex
37
80
38
- CUDA_VERSION="${image##*cuda}" # Remove everything up to and including "cuda"
81
+ # Parse CUDA version from image tag, e.g., "nvidia/cuda:12.6.3-devel-ubuntu22.04"
82
+ IMAGE_TAG="${image##*:}" # "12.6.3-devel-ubuntu22.04"
83
+ CUDA_VERSION="${IMAGE_TAG%%-*}" # "12.6.3"
39
84
echo "Using CUDA version: ${CUDA_VERSION}"
40
- CUDA_VERSION_M_M="${cuda_version%.*}" # Get major.minor by removing the last dot and everything after
41
- CUDA_VERSION_MM="${CUDA_VERSION_M_M//'.'/''}"
42
- TORCH_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html"
43
- echo "Torch URL: ${TORCH_URL}"
85
+ CUDA_VERSION_M_M="${CUDA_VERSION%.*}" # "12.6"
86
+ CUDA_VERSION_MM="${CUDA_VERSION_M_M//./}" # "126"
87
+ export UV_TORCH_BACKEND=cu${CUDA_VERSION_MM}
44
88
COVERAGE_SOURCE=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="lightning_fabric").get(n, n))')
45
89
echo "collecting coverage for: ${COVERAGE_SOURCE}"
46
- TORCH_VER=$(python -c "import torch; print(torch.__version__.rsplit('.', 1)[0])")
47
90
48
- if [ "${TORCH_VER}" == "2.1" ]; then
91
+ uv pip install fire wget packaging "lightning-utilities[cli]"
92
+ if [ "${python_version}" == "3.10" ]; then
49
93
echo "Set oldest versions"
50
- pip uninstall -y deepspeed
51
- pip install -U "lightning-utilities[cli]"
52
94
cd requirements/fabric
53
95
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'strategies.txt']"
54
96
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
55
97
cd ../..
56
- pip install "cython<3.0" wheel # for compatibility
98
+ uv pip install "cython<3.0" wheel # for compatibility
57
99
fi
58
100
101
+ # install the base so we can adjust other packages
102
+ uv pip install .
59
103
echo "Adjust torch versions in requirements files"
60
104
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
61
- pip install -q wget packaging
105
+ uv pip install wget packaging
62
106
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
63
107
for fpath in `ls requirements/**/*.txt`; do \
64
108
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
65
109
done
66
110
67
111
if [ "${PACKAGE_NAME}" == "fabric" ]; then
68
112
echo "Replaced PL imports"
69
- pip install -U -q -r .actions/requirements.txt
113
+ uv pip install --upgrade -r .actions/requirements.txt
70
114
python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_fabric" \
71
115
--source_import="lightning.fabric" \
72
116
--target_import="lightning_fabric"
@@ -76,11 +120,10 @@ run: |
76
120
fi
77
121
78
122
extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))")
79
- pip install -e ".[${extra}dev]" -U -- upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
123
+ uv pip install ".[${extra}dev]" -- upgrade
80
124
81
125
python requirements/collect_env_details.py
82
126
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
83
- python requirements/pytorch/check-avail-extras.py
84
127
python -c "import bitsandbytes"
85
128
86
129
echo "Testing: Fabric doctests"
@@ -96,7 +139,7 @@ run: |
96
139
97
140
echo "Testing: fabric standalone"
98
141
export PL_RUN_STANDALONE_TESTS=1
99
- wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
142
+ python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
100
143
bash ./run_standalone_tests.sh "tests_fabric"
101
144
export PL_RUN_STANDALONE_TESTS=0
102
145
0 commit comments