Skip to content

Commit ec1824f

Browse files
committed
pytorch
1 parent 0f435fd commit ec1824f

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed

.lightning/workflows/fabric.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ run: |
100100
# ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
101101
# --flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
102102
# ls -l
103+
cd ..
103104
104105
# Testing: fabric examples
106+
cd examples/
105107
bash run_fabric_examples.sh --accelerator=cuda --devices=1
106108
bash run_fabric_examples.sh --accelerator=cuda --devices=2 --strategy ddp

.lightning/workflows/pytorch.yml

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
trigger:
2+
push:
3+
branches: ["master"]
4+
pull_request:
5+
branches: ["master"]
6+
7+
timeout: "75" # minutes
8+
machine: "L4_X_2"
9+
parametrize:
10+
matrix: {}
11+
include:
12+
# note that this is setting also all oldest requirements which is linked to Torch == 2.0
13+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
14+
PACKAGE_NAME: "pytorch"
15+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
16+
PACKAGE_NAME: "pytorch"
17+
# - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
18+
# PACKAGE_NAME: "pytorch"
19+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
20+
PACKAGE_NAME: "lightning"
21+
exclude: []
22+
23+
env:
24+
FREEZE_REQUIREMENTS: "1"
25+
RUN_ONLY_CUDA_TESTS: "1"
26+
27+
run: |
28+
whereis nvidia
29+
nvidia-smi
30+
python --version
31+
pip --version
32+
pip install -q fire wget packaging
33+
set -ex
34+
35+
CUDA_VERSION="${image##*cuda}" # Remove everything up to and including "cuda"
36+
echo "Using CUDA version: ${CUDA_VERSION}"
37+
CUDA_VERSION_M_M="${cuda_version%.*}" # Get major.minor by removing the last dot and everything after
38+
CUDA_VERSION_MM="${CUDA_VERSION_M_M//'.'/''}"
39+
TORCH_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html"
40+
echo ${TORCH_URL}
41+
COVERAGE_SOURCE=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="pytorch_lightning").get(n, n))')
42+
echo "collecting coverage for: ${COVERAGE_SOURCE}"
43+
44+
if [ "${TORCH_VER}" == "2.1" ]; then
45+
# Set oldest versions
46+
python .actions/assistant.py replace_oldest_ver
47+
pip install "cython<3.0" wheel # for compatibility
48+
fi
49+
50+
# Adjust torch versions in requirements files
51+
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
52+
pip install -q wget packaging
53+
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
54+
for fpath in `ls requirements/**/*.txt`; do \
55+
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
56+
done
57+
58+
if [ "${PACKAGE_NAME}" == "pytorch" ]; then
59+
pip install -U -q -r .actions/requirements.txt
60+
python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_pytorch" \
61+
--source_import="lightning.fabric,lightning.pytorch" \
62+
--target_import="lightning_fabric,pytorch_lightning"
63+
python .actions/assistant.py copy_replace_imports --source_dir="./examples/pytorch/basics" \
64+
--source_import="lightning.fabric,lightning.pytorch" \
65+
--target_import="lightning_fabric,pytorch_lightning"
66+
fi
67+
68+
extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))")
69+
pip install -e ".[${extra}dev]" -U --upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
70+
71+
if [ "${PACKAGE_NAME}" == "pytorch" ]; then
72+
# Drop LAI from extensions
73+
pip uninstall -y lightning
74+
elif [ "${PACKAGE_NAME}" == "lightning" ]; then
75+
# Drop PL for LAI
76+
pip uninstall -y pytorch-lightning
77+
fi
78+
79+
python requirements/collect_env_details.py
80+
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
81+
python requirements/pytorch/check-avail-extras.py
82+
python -c "import bitsandbytes"
83+
84+
# Testing: Fabric doctests
85+
if [ "${PACKAGE_NAME}" == "pytorch" ]; then
86+
cd src/
87+
python -m pytest pytorch_lightning
88+
cd ..
89+
fi
90+
91+
# Get legacy checkpoints
92+
bash .actions/pull_legacy_checkpoints.sh
93+
cd tests/legacy
94+
bash generate_checkpoints.sh
95+
ls -l checkpoints/
96+
97+
cd tests/
98+
# Testing: fabric standard
99+
python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_pytorch/ -v --durations=50
100+
101+
# Testing: fabric standalone
102+
export PL_USE_MOCKED_MNIST=1
103+
export PL_RUN_STANDALONE_TESTS=1
104+
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
105+
bash ./run_standalone_tests.sh "tests_pytorch"
106+
107+
# Testing: PyTorch standalone tasks
108+
cd tests_pytorch/
109+
bash run_standalone_tasks.sh
110+
111+
python -m coverage report
112+
python -m coverage xml
113+
python -m coverage html
114+
115+
# TODO: enable coverage
116+
# # https://docs.codecov.com/docs/codecov-uploader
117+
# curl -Os https://uploader.codecov.io/latest/linux/codecov
118+
# chmod +x codecov
119+
# ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
120+
# --flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
121+
# ls -l
122+
cd ../..
123+
124+
# Testing: PyTorch examples
125+
cd examples/
126+
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=1
127+
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=2 --trainer.strategy=ddp
128+
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=2 --trainer.strategy=ddp --trainer.precision=16

0 commit comments

Comments
 (0)