Skip to content

Commit c0ebb3b

Browse files
authored
Merge branch 'master' into refactor/tensorboard_log_metrics_handling
2 parents ca835f2 + b554e99 commit c0ebb3b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1594
-107
lines changed

.azure/gpu-tests-fabric.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ jobs:
8585
displayName: "extend env. vars 4 future"
8686
8787
- bash: |
88+
set -ex
8889
echo $(DEVICES)
8990
echo $CUDA_VISIBLE_DEVICES
9091
echo $CUDA_VERSION_MM
@@ -96,6 +97,10 @@ jobs:
9697
python --version
9798
pip --version
9899
pip list
100+
# todo: rather use devel base image
101+
apt-get update -qq --fix-missing
102+
apt-get install -y cuda-toolkit
103+
nvcc --version
99104
displayName: "Image info & NVIDIA"
100105
101106
- bash: |
@@ -156,7 +161,7 @@ jobs:
156161
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_fabric/ -v --durations=50
157162
workingDirectory: tests/
158163
displayName: "Testing: fabric standard"
159-
timeoutInMinutes: "10"
164+
timeoutInMinutes: "15"
160165

161166
- bash: |
162167
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
@@ -165,7 +170,7 @@ jobs:
165170
env:
166171
PL_RUN_STANDALONE_TESTS: "1"
167172
displayName: "Testing: fabric standalone"
168-
timeoutInMinutes: "10"
173+
timeoutInMinutes: "15"
169174
170175
- bash: |
171176
python -m coverage report

.azure/gpu-tests-pytorch.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ jobs:
8484
displayName: "extend env. vars 4 future"
8585
8686
- bash: |
87+
set -ex
8788
echo $(DEVICES)
8889
echo $CUDA_VISIBLE_DEVICES
8990
echo $CUDA_VERSION_MM
@@ -95,6 +96,10 @@ jobs:
9596
python --version
9697
pip --version
9798
pip list
99+
# todo: rather use devel base image
100+
apt-get update -qq --fix-missing
101+
apt-get install -y cuda-toolkit
102+
nvcc --version
98103
displayName: "Image info & NVIDIA"
99104
100105
- bash: |
@@ -189,7 +194,7 @@ jobs:
189194
env:
190195
PL_USE_MOCKED_MNIST: "1"
191196
displayName: "Testing: PyTorch standalone tasks"
192-
timeoutInMinutes: "10"
197+
timeoutInMinutes: "15"
193198

194199
- bash: |
195200
python -m coverage report

.github/checkgroup.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ subprojects:
4747
- "!*.md"
4848
- "!**/*.md"
4949
checks:
50-
- "pytorch.yml / Lit Job (nvidia/cuda:12.1.1-runtime-ubuntu22.04, pytorch, 3.10)"
50+
- "pytorch.yml / Lit Job (nvidia/cuda:12.1.1-devel-ubuntu22.04, pytorch, 3.10)"
5151
- "pytorch.yml / Lit Job (lightning, 3.12)"
5252
- "pytorch.yml / Lit Job (pytorch, 3.12)"
5353

@@ -148,7 +148,7 @@ subprojects:
148148
- "!*.md"
149149
- "!**/*.md"
150150
checks:
151-
- "fabric.yml / Lit Job (nvidia/cuda:12.1.1-runtime-ubuntu22.04, fabric, 3.10)"
151+
- "fabric.yml / Lit Job (nvidia/cuda:12.1.1-devel-ubuntu22.04, fabric, 3.10)"
152152
- "fabric.yml / Lit Job (fabric, 3.12)"
153153
- "fabric.yml / Lit Job (lightning, 3.12)"
154154

.github/workflows/_legacy-checkpoints.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
- uses: actions/checkout@v5
5959

6060
- name: Install uv and set Python version
61-
uses: astral-sh/setup-uv@v6
61+
uses: astral-sh/setup-uv@v7
6262
with:
6363
python-version: "3.9"
6464
# TODO: Avoid activating environment like this

.github/workflows/ci-tests-fabric.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
- uses: actions/checkout@v5
7272

7373
- name: Install uv and set Python version
74-
uses: astral-sh/setup-uv@v6
74+
uses: astral-sh/setup-uv@v7
7575
with:
7676
python-version: ${{ matrix.config.python-version || '3.9' }}
7777
# TODO: Avoid activating environment like this

.github/workflows/ci-tests-pytorch.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ jobs:
7676
- uses: actions/checkout@v5
7777

7878
- name: Install uv and set Python version
79-
uses: astral-sh/setup-uv@v6
79+
uses: astral-sh/setup-uv@v7
8080
with:
8181
python-version: ${{ matrix.config.python-version || '3.9' }}
8282
# TODO: Avoid activating environment like this

.github/workflows/code-checks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
- uses: actions/checkout@v5
3232

3333
- name: Install uv and set Python version
34-
uses: astral-sh/setup-uv@v6
34+
uses: astral-sh/setup-uv@v7
3535
with:
3636
python-version: "3.11"
3737
# TODO: Avoid activating environment like this

.github/workflows/docs-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ jobs:
7373
lfs: ${{ matrix.pkg-name == 'pytorch' }}
7474

7575
- name: Install uv and set Python version
76-
uses: astral-sh/setup-uv@v6
76+
uses: astral-sh/setup-uv@v7
7777
with:
7878
python-version: "3.10"
7979
# TODO: Avoid activating environment like this

.lightning/workflows/fabric.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ trigger:
66

77
timeout: "60" # minutes
88
machine: "L4_X_2"
9-
image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
9+
image: "nvidia/cuda:12.6.3-devel-ubuntu22.04"
1010
parametrize:
1111
matrix: {}
1212
include:
1313
# note that this is setting also all oldest requirements which is linked to python == 3.10
14-
- image: "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
14+
- image: "nvidia/cuda:12.1.1-devel-ubuntu22.04"
1515
PACKAGE_NAME: "fabric"
1616
python_version: "3.10"
1717
- PACKAGE_NAME: "fabric"
1818
python_version: "3.12"
19-
# - image: "nvidia/cuda:12.6-runtime-ubuntu22.04"
20-
# PACKAGE_NAME: "fabric"
19+
#- image: "nvidia/cuda:12.6-runtime-ubuntu22.04"
20+
# PACKAGE_NAME: "fabric"
2121
- PACKAGE_NAME: "lightning"
2222
python_version: "3.12"
2323
exclude: []

.lightning/workflows/pytorch.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ trigger:
66

77
timeout: "60" # minutes
88
machine: "L4_X_2"
9-
image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
9+
image: "nvidia/cuda:12.6.3-devel-ubuntu22.04"
1010
parametrize:
1111
matrix: {}
1212
include:
1313
# note that this also sets oldest requirements which are linked to Python == 3.10
14-
- image: "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
14+
- image: "nvidia/cuda:12.1.1-devel-ubuntu22.04"
1515
PACKAGE_NAME: "pytorch"
1616
python_version: "3.10"
1717
- PACKAGE_NAME: "pytorch"
1818
python_version: "3.12"
19-
# - image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
20-
# PACKAGE_NAME: "pytorch"
19+
#- image: "nvidia/cuda:12.6.3-devel-ubuntu22.04"
20+
# PACKAGE_NAME: "pytorch"
2121
- PACKAGE_NAME: "lightning"
2222
python_version: "3.12"
2323
exclude: []

0 commit comments

Comments
 (0)