ci: test gpu on self-hosted runners #33
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
pull_request: | |
push: | |
branches: | |
- master | |
# Cancel a job if there's a new one on the same branch started. | |
# Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051 | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
CARGO_INCREMENTAL: 0 | |
RUST_BACKTRACE: 1 | |
# Faster crates.io index checkout. | |
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse | |
RUST_LOG: debug | |
# Build the kernel only for the single architecture. This should reduce the overall compile-time significantly. | |
EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75 | |
BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75 | |
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75 | |
jobs: | |
check_clippy: | |
runs-on: ubuntu-24.04 | |
name: Clippy | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install required packages | |
run: | | |
sudo apt-get update | |
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev | |
- name: Install cargo clippy | |
run: rustup component add clippy | |
- name: Run cargo clippy | |
run: cargo clippy --all-targets --workspace -- -D warnings | |
check_fmt: | |
runs-on: ubuntu-24.04 | |
name: Checking fmt | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install cargo fmt | |
run: rustup component add rustfmt | |
- name: Run cargo fmt | |
run: cargo fmt --all -- --check | |
test: | |
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge+gpu'] | |
name: Test | |
steps: | |
- uses: actions/checkout@v4 | |
# TODO: Move the driver installation to the AMI. | |
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/install-nvidia-driver.html | |
# https://www.nvidia.com/en-us/drivers/ | |
- name: Install CUDA drivers | |
run: | | |
curl -L -o nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb https://us.download.nvidia.com/tesla/570.148.08/nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb | |
sudo dpkg -i nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb | |
sudo cp /var/nvidia-driver-local-repo-ubuntu2404-570.148.08/nvidia-driver-local-*-keyring.gpg /usr/share/keyrings/ | |
sudo apt-get update | |
sudo apt-get install --no-install-recommends --yes cuda-drivers | |
rm nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb | |
- name: Install required packages | |
run: | | |
sudo apt-get update | |
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev | |
# TODO: Remove this and other rust installation directives from jobs running | |
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17 | |
with: | |
toolchain: 1.83 | |
- name: Test | |
run: cargo test --verbose | |
test_macos: | |
runs-on: macos-latest | |
name: Test in release mode on MacOS | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install required packages | |
run: HOMEBREW_NO_AUTO_UPDATE=1 brew install hwloc | |
- name: Run usual tests in release profile | |
# CUDA isn't support on MacOS, hence only enable OpenCL. | |
run: cargo test --verbose --release --no-default-features -- --nocapture |