@@ -8,65 +8,109 @@ timeout: "55" # minutes
88parametrize :
99 matrix : {}
1010 include :
11- # note that this is setting also all oldest requirements which is linked to Torch == 2.1
12- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .1.1-py3.10-torch2.1 "
11+ # note that this also sets oldest requirements which are linked to Python == 3.10
12+ - image : " nvidia/cuda:12 .1.1-runtime-ubuntu22.04 "
1313 PACKAGE_NAME : " pytorch"
14+ python_version : " 3.10"
1415 machine : " A100_X_2"
15- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.8 "
16+ - image : " nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
1617 PACKAGE_NAME : " pytorch"
18+ python_version : " 3.12"
1719 machine : " L4_X_2"
18- # - image: "pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.7 "
20+ # - image: "nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
1921 # PACKAGE_NAME: "pytorch"
20- - image : " pytorchlightning/pytorch_lightning:base-cuda12 .6.3-py3.12-torch2.8 "
22+ - image : " nvidia/cuda:12 .6.3-runtime-ubuntu22.04 "
2123 PACKAGE_NAME : " lightning"
24+ python_version : " 3.12"
2225 machine : " L4_X_2"
2326 exclude : []
2427
2528env :
29+ TZ : " Etc/UTC"
30+ DEBIAN_FRONTEND : " noninteractive"
31+ CUDA_TOOLKIT_ROOT_DIR : " /usr/local/cuda"
32+ MKL_THREADING_LAYER : " GNU"
33+ CUDA_LAUNCH_BLOCKING : " 1"
34+ NCCL_DEBUG : " INFO"
35+ TORCHDYNAMO_VERBOSE : " 1"
2636 FREEZE_REQUIREMENTS : " 1"
2737 RUN_ONLY_CUDA_TESTS : " 1"
2838
2939run : |
40+ # Install Python and UV
41+ apt-get update -qq --fix-missing
42+ apt-get install -q -y software-properties-common curl
43+ # Add deadsnakes PPA for newer Python versions if needed
44+ add-apt-repository ppa:deadsnakes/ppa -y
45+ apt-get update -qq --fix-missing
46+ apt-get install -q -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
47+ build-essential \
48+ pkg-config \
49+ cmake \
50+ ca-certificates \
51+ libopenmpi-dev \
52+ openmpi-bin \
53+ ninja-build \
54+ libnccl2 \
55+ libnccl-dev
56+
57+ apt-get install -y python${python_version} python${python_version}-venv python${python_version}-dev
58+ ln -sf /usr/bin/python${python_version} /usr/bin/python
59+ curl -LsSf https://astral.sh/uv/install.sh | sh
60+
61+ # Source the environment and ensure UV is in PATH
62+ [ -f "$HOME/.local/bin/env" ] && . "$HOME/.local/bin/env"
63+ export PATH="$HOME/.local/bin:$PATH"
64+ source $HOME/.cargo/env 2>/dev/null || true
65+ export PATH="$HOME/.cargo/bin:$PATH"
66+
67+ # Verify UV installation
68+ command -v uv || (echo "UV not found in PATH" && exit 1)
69+ # Create and activate a local uv virtual environment
70+ uv venv .venv -p "/usr/bin/python${python_version}" || uv venv .venv -p "python${python_version}" || uv venv .venv
71+ . .venv/bin/activate
72+ hash -r
73+
3074 whereis nvidia
3175 nvidia-smi
3276 python --version
33- pip --version
34- pip install -q fire wget packaging
35- pip list
77+ uv --version
78+ uv pip list
3679 set -ex
3780
38- CUDA_VERSION="${image##*cuda}" # Remove everything up to and including "cuda"
81+ # Parse CUDA version from image tag, e.g., "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
82+ IMAGE_TAG="${image##*:}" # "12.6.3-runtime-ubuntu22.04"
83+ CUDA_VERSION="${IMAGE_TAG%%-*}" # "12.6.3"
3984 echo "Using CUDA version: ${CUDA_VERSION}"
40- CUDA_VERSION_M_M="${cuda_version%.*}" # Get major.minor by removing the last dot and everything after
41- CUDA_VERSION_MM="${CUDA_VERSION_M_M//'.'/''}"
42- TORCH_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html"
43- echo "Torch URL: ${TORCH_URL}"
85+ CUDA_VERSION_M_M="${CUDA_VERSION%.*}" # "12.6"
86+ CUDA_VERSION_MM="${CUDA_VERSION_M_M//./}" # "126"
87+ export UV_TORCH_BACKEND=cu${CUDA_VERSION_MM}
4488 COVERAGE_SOURCE=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="pytorch_lightning").get(n, n))')
4589 echo "collecting coverage for: ${COVERAGE_SOURCE}"
46- TORCH_VER=$(python -c "import torch; print(torch.__version__.rsplit('.', 1)[0])")
4790
48- if [ "${TORCH_VER}" == "2.1" ]; then
91+ uv pip install -q fire wget packaging "lightning-utilities[cli]"
92+ if [ "${python_version}" == "3.10" ]; then
4993 echo "Set oldest versions"
50- pip uninstall -y deepspeed
51- pip install -U "lightning-utilities[cli]"
5294 cd requirements/pytorch
5395 python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'extra.txt', 'strategies.txt', 'examples.txt']"
5496 python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
5597 cd ../..
56- pip install "cython<3.0" wheel # for compatibility
98+ uv pip install "cython<3.0" wheel # for compatibility
5799 fi
58100
101+ # install the base so we can adjust other packages
102+ uv pip install .
59103 echo "Adjust torch versions in requirements files"
60104 PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
61- pip install -q wget packaging
105+ uv pip install -q wget packaging
62106 python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
63107 for fpath in `ls requirements/**/*.txt`; do \
64108 python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
65109 done
66110
67111 if [ "${PACKAGE_NAME}" == "pytorch" ]; then
68112 echo "Adjust PL imports"
69- pip install -U -q -r .actions/requirements.txt
113+ uv pip install --upgrade -r .actions/requirements.txt
70114 python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_pytorch" \
71115 --source_import="lightning.fabric,lightning.pytorch" \
72116 --target_import="lightning_fabric,pytorch_lightning"
@@ -76,14 +120,14 @@ run: |
76120 fi
77121
78122 extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))")
79- pip install -e ".[${extra}dev]" -U -- upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
123+ uv pip install -e ".[${extra}dev]" -- upgrade
80124
81125 if [ "${PACKAGE_NAME}" == "pytorch" ]; then
82126 echo "uninstall lightning to have just single package"
83- pip uninstall -y lightning
127+ uv pip uninstall lightning
84128 elif [ "${PACKAGE_NAME}" == "lightning" ]; then
85129 echo "uninstall PL to have just single package"
86- pip uninstall -y pytorch-lightning
130+ uv pip uninstall pytorch-lightning
87131 fi
88132
89133 python requirements/collect_env_details.py
@@ -112,7 +156,7 @@ run: |
112156 echo "Testing: fabric standalone"
113157 export PL_USE_MOCKED_MNIST=1
114158 export PL_RUN_STANDALONE_TESTS=1
115- wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
159+ python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
116160 bash ./run_standalone_tests.sh "tests_pytorch"
117161 export PL_RUN_STANDALONE_TESTS=0
118162
0 commit comments