Skip to content

Commit 1b36e72

Browse files
authored
ci: test gpu on self-hosted runners (#1785)
GPU tests are now run on machines with a GPU.
1 parent 6ac9745 commit 1b36e72

File tree

1 file changed

+83
-42
lines changed

1 file changed

+83
-42
lines changed

.github/workflows/ci.yml

Lines changed: 83 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
name: CI
22

3-
on: [pull_request, push]
4-
#on: [push]
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
58

69
# Cancel a job if there's a new on on the same branch started.
710
# Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051
@@ -23,7 +26,9 @@ jobs:
2326
steps:
2427
- uses: actions/checkout@v4
2528
- name: Install required packages
26-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
29+
run: |
30+
sudo apt-get update
31+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
2732
- name: Install cargo clippy
2833
run: rustup component add clippy
2934
- name: Run cargo clippy
@@ -40,42 +45,49 @@ jobs:
4045
run: cargo fmt --all -- --check
4146

4247
test_release:
43-
runs-on: ubuntu-24.04
48+
runs-on: ['self-hosted', 'linux', 'x64', '4xlarge']
4449
name: Test in release mode
4550
strategy:
4651
matrix:
4752
cargo-args: ['', '--features fixed-rows-to-discard']
53+
fail-fast: false
4854
env:
4955
# Run all tests with multicore-SDR enabled.
5056
FIL_PROOFS_USE_MULTICORE_SDR: true
5157
steps:
5258
- uses: actions/checkout@v4
5359
- name: Install required packages
54-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
60+
run: |
61+
sudo apt-get update
62+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
5563
5664
- name: Download the proof params
5765
uses: ./.github/actions/proof-params-download
5866
with:
5967
github-token: ${{ secrets.GITHUB_TOKEN }}
6068

69+
# TODO: Remove this and other rust installation directives from jobs running
70+
# on self-hosted runners once rust is available on these machines by default
71+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
72+
with:
73+
toolchain: 1.83
74+
6175
- name: Run usual tests in release profile
6276
run: cargo test --verbose --release --workspace --all-targets ${{ matrix.cargo-args }} -- --nocapture
6377
- name: Run isolated PoRep tests in release profile
64-
# Getting the cores does not work on GitHub Actions, hence skip that
65-
# specific test.
66-
run: cargo test --release -p storage-proofs-porep --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --skip stacked::vanilla::cores::tests::test_checkout_cores
78+
run: cargo test --release -p storage-proofs-porep --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --test-threads=1
6779
- name: Run isolated update tests in release profile
68-
# Some `storage-proofs-update` tests need to run sequentially due to
69-
# their high memory usage.
70-
run: cargo test --release -p storage-proofs-update --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --test-threads=1
80+
run: cargo test --release -p storage-proofs-update --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture
7181

7282
test_ignored_release:
7383
runs-on: ubuntu-24.04
7484
name: Test ignored in release mode
7585
steps:
7686
- uses: actions/checkout@v4
7787
- name: Install required packages
78-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
88+
run: |
89+
sudo apt-get update
90+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
7991
8092
- name: Download the proof params
8193
uses: ./.github/actions/proof-params-download
@@ -86,60 +98,89 @@ jobs:
8698
run: cargo test --release --workspace -- ignored --nocapture
8799

88100
test_no_default_features:
89-
runs-on: ubuntu-24.04
101+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
90102
name: Test without default features
91103
steps:
92104
- uses: actions/checkout@v4
93105
- name: Install required packages
94-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
106+
run: |
107+
sudo apt-get update
108+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
95109
96110
- name: Download the proof params
97111
uses: ./.github/actions/proof-params-download
98112
with:
99113
github-token: ${{ secrets.GITHUB_TOKEN }}
100114

115+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
116+
with:
117+
toolchain: 1.83
118+
101119
- name: Test ignored in release profile
102120
run: cargo test --release --workspace --no-default-features
103121

104122
build_gpu:
105-
runs-on: ubuntu-24.04
123+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
106124
name: Build with various GPU support enabled
107125
steps:
108126
- uses: actions/checkout@v4
109127
- name: Install required packages
110-
run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
128+
run: |
129+
sudo apt-get update
130+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
131+
132+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
133+
with:
134+
toolchain: 1.83
111135

112136
- name: Build with `cuda` and `opencl` features enabled
113137
run: cargo build --workspace --features cuda,opencl
114138
- name: Build with `cuda-supraseal` feature enabled
115139
run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo build --workspace --no-default-features --features cuda-supraseal
116140

117-
# Commented out until we run it on hardware with actual GPUs.
118-
#test_gpu:
119-
# runs-on: ubuntu-24.04
120-
# name: Test on GPUs
121-
# strategy:
122-
# matrix:
123-
# test-args: ['', '--ignored']
124-
# env:
125-
# FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
126-
# FIL_PROOFS_USE_GPU_TREE_BUILDER: true
127-
# BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
128-
# NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
129-
# steps:
130-
# - uses: actions/checkout@v4
131-
# - name: Install required packages
132-
# run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
133-
#
134-
# - name: Download the proof params
135-
# uses: ./.github/actions/proof-params-download
136-
# with:
137-
# github-token: ${{ secrets.GITHUB_TOKEN }}
138-
#
139-
# - name: Test with CUDA
140-
# run: cargo test --verbose --release --workspace --features cuda -- --nocapture ${{ matrix.test-args }}
141-
# - name: Test with `cuda-supraseal`
142-
# run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo test -p filecoin-proofs --release --no-default-features --features cuda-supraseal -- --nocapture --test-threads=1 ${{ matrix.test-args }}
141+
test_gpu:
142+
runs-on: ['self-hosted', 'linux', 'x64', 'xlarge+gpu']
143+
name: Test on GPUs
144+
strategy:
145+
matrix:
146+
test-args: ['', '--ignored']
147+
fail-fast: false
148+
env:
149+
FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
150+
FIL_PROOFS_USE_GPU_TREE_BUILDER: true
151+
BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
152+
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
153+
steps:
154+
- uses: actions/checkout@v4
155+
# TODO: Move the driver installation to the AMI.
156+
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/install-nvidia-driver.html
157+
# https://www.nvidia.com/en-us/drivers/
158+
- name: Install CUDA drivers
159+
run: |
160+
curl -L -o nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb https://us.download.nvidia.com/tesla/570.148.08/nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
161+
sudo dpkg -i nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
162+
sudo cp /var/nvidia-driver-local-repo-ubuntu2404-570.148.08/nvidia-driver-local-*-keyring.gpg /usr/share/keyrings/
163+
sudo apt-get update
164+
sudo apt-get install --no-install-recommends --yes cuda-drivers
165+
rm nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
166+
- name: Install required packages
167+
run: |
168+
sudo apt-get update
169+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
170+
171+
- name: Download the proof params
172+
uses: ./.github/actions/proof-params-download
173+
with:
174+
github-token: ${{ secrets.GITHUB_TOKEN }}
175+
176+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
177+
with:
178+
toolchain: 1.83
179+
180+
- name: Test with CUDA
181+
run: cargo test --verbose --release --workspace --features cuda -- --nocapture ${{ matrix.test-args }}
182+
- name: Test with `cuda-supraseal`
183+
run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo test -p filecoin-proofs --release --no-default-features --features cuda-supraseal -- --nocapture --test-threads=1 ${{ matrix.test-args }}
143184

144185
test_macos:
145186
runs-on: macos-latest

0 commit comments

Comments
 (0)