@@ -8,67 +8,109 @@ timeout: "55" # minutes
88parametrize :
99 matrix : {}
1010 include :
11- # note that this is setting also all oldest requirements which is linked to Torch == 2.1
12- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .1.1-py3.10-torch2.1 "
11+ # note that this also sets oldest requirements which are linked to Python == 3.10
12+ - image : " nvidia/cuda:12 .1.1-runtime-ubuntu22.04 "
1313 PACKAGE_NAME : " pytorch"
14+ python_version : " 3.10"
1415 machine : " T4_X_4"
15- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.8 "
16+ - image : " nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
1617 PACKAGE_NAME : " pytorch"
18+ python_version : " 3.12"
1719 machine : " L4_X_2"
18- # - image: "pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.7 "
20+ # - image: "nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
1921 # PACKAGE_NAME: "pytorch"
20- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.8 "
22+ - image : " nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
2123 PACKAGE_NAME : " lightning"
24+ python_version : " 3.12"
2225 machine : " L4_X_2"
2326 exclude : []
2427
2528env :
26- FREEZE_REQUIREMENTS : " 1"
27- RUN_ONLY_CUDA_TESTS : " 1"
29+ TZ : " Etc/UTC"
30+ DEBIAN_FRONTEND : " noninteractive"
31+ CUDA_TOOLKIT_ROOT_DIR : " /usr/local/cuda"
32+ MKL_THREADING_LAYER : " GNU"
2833 CUDA_LAUNCH_BLOCKING : " 1"
2934 NCCL_DEBUG : " INFO"
35+ TORCHDYNAMO_VERBOSE : " 1"
36+ FREEZE_REQUIREMENTS : " 1"
37+ RUN_ONLY_CUDA_TESTS : " 1"
3038
3139run : |
40+ # Install Python and UV
41+ apt-get update -qq --fix-missing
42+ apt-get install -q -y software-properties-common curl
43+ # Add deadsnakes PPA for newer Python versions if needed
44+ add-apt-repository ppa:deadsnakes/ppa -y
45+ apt-get update -qq --fix-missing
46+ apt-get install -q -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
47+ build-essential \
48+ pkg-config \
49+ cmake \
50+ ca-certificates \
51+ libopenmpi-dev \
52+ openmpi-bin \
53+ ninja-build \
54+ libnccl2 \
55+ libnccl-dev
56+
57+ apt-get install -y python${python_version} python${python_version}-venv python${python_version}-dev
58+ ln -sf /usr/bin/python${python_version} /usr/bin/python
59+ curl -LsSf https://astral.sh/uv/install.sh | sh
60+
61+ # Source the environment and ensure UV is in PATH
62+ [ -f "$HOME/.local/bin/env" ] && . "$HOME/.local/bin/env"
63+ export PATH="$HOME/.local/bin:$PATH"
64+ source $HOME/.cargo/env 2>/dev/null || true
65+ export PATH="$HOME/.cargo/bin:$PATH"
66+
67+ # Verify UV installation
68+ command -v uv || (echo "UV not found in PATH" && exit 1)
69+ # Create and activate a local uv virtual environment
70+ uv venv .venv -p "/usr/bin/python${python_version}" || uv venv .venv -p "python${python_version}" || uv venv .venv
71+ . .venv/bin/activate
72+ hash -r
73+
3274 whereis nvidia
3375 nvidia-smi
3476 python --version
35- pip --version
36- pip install -q fire wget packaging
37- pip list
77+ uv --version
78+ uv pip list
3879 set -ex
3980
40- CUDA_VERSION="${image##*cuda}" # Remove everything up to and including "cuda"
81+ # Parse CUDA version from image tag, e.g., "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
82+ IMAGE_TAG="${image##*:}" # "12.6.3-runtime-ubuntu22.04"
83+ CUDA_VERSION="${IMAGE_TAG%%-*}" # "12.6.3"
4184 echo "Using CUDA version: ${CUDA_VERSION}"
42- CUDA_VERSION_M_M="${cuda_version%.*}" # Get major.minor by removing the last dot and everything after
43- CUDA_VERSION_MM="${CUDA_VERSION_M_M//'.'/''}"
44- TORCH_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html"
45- echo "Torch URL: ${TORCH_URL}"
85+ CUDA_VERSION_M_M="${CUDA_VERSION%.*}" # "12.6"
86+ CUDA_VERSION_MM="${CUDA_VERSION_M_M//./}" # "126"
87+ export UV_TORCH_BACKEND=cu${CUDA_VERSION_MM}
4688 COVERAGE_SOURCE=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="pytorch_lightning").get(n, n))')
4789 echo "collecting coverage for: ${COVERAGE_SOURCE}"
48- TORCH_VER=$(python -c "import torch; print(torch.__version__.rsplit('.', 1)[0])")
4990
50- if [ "${TORCH_VER}" == "2.1" ]; then
91+ uv pip install -q fire wget packaging "lightning-utilities[cli]"
92+ if [ "${python_version}" == "3.10" ]; then
5193 echo "Set oldest versions"
52- pip uninstall -y deepspeed
53- pip install -U "lightning-utilities[cli]"
5494 cd requirements/pytorch
5595 python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'extra.txt', 'strategies.txt', 'examples.txt']"
5696 python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
5797 cd ../..
58- pip install "cython<3.0" wheel # for compatibility
98+ uv pip install "cython<3.0" wheel # for compatibility
5999 fi
60100
101+ # install the base so we can adjust other packages
102+ uv pip install .
61103 echo "Adjust torch versions in requirements files"
62104 PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
63- pip install -q wget packaging
105+ uv pip install -q wget packaging
64106 python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
65107 for fpath in `ls requirements/**/*.txt`; do \
66108 python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
67109 done
68110
69111 if [ "${PACKAGE_NAME}" == "pytorch" ]; then
70112 echo "Adjust PL imports"
71- pip install -U -q -r .actions/requirements.txt
113+ uv pip install --upgrade -r .actions/requirements.txt
72114 python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_pytorch" \
73115 --source_import="lightning.fabric,lightning.pytorch" \
74116 --target_import="lightning_fabric,pytorch_lightning"
@@ -78,14 +120,14 @@ run: |
78120 fi
79121
80122 extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))")
81- pip install -e ".[${extra}dev]" -U -- upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
123+ uv pip install -e ".[${extra}dev]" -- upgrade
82124
83125 if [ "${PACKAGE_NAME}" == "pytorch" ]; then
84126 echo "uninstall lightning to have just single package"
85- pip uninstall -y lightning
127+ uv pip uninstall -y lightning || true
86128 elif [ "${PACKAGE_NAME}" == "lightning" ]; then
87129 echo "uninstall PL to have just single package"
88- pip uninstall -y pytorch-lightning
130+ uv pip uninstall -y pytorch-lightning || true
89131 fi
90132
91133 python requirements/collect_env_details.py
@@ -114,7 +156,7 @@ run: |
114156 echo "Testing: fabric standalone"
115157 export PL_USE_MOCKED_MNIST=1
116158 export PL_RUN_STANDALONE_TESTS=1
117- wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
159+ python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
118160 bash ./run_standalone_tests.sh "tests_pytorch"
119161 export PL_RUN_STANDALONE_TESTS=0
120162
0 commit comments