File tree Expand file tree Collapse file tree 2 files changed +90
-1
lines changed
Expand file tree Collapse file tree 2 files changed +90
-1
lines changed Original file line number Diff line number Diff line change 1+ name : Build Checks
2+ on : [push, pull_request]
3+ permissions :
4+ contents : read
5+ pull-requests : read
6+
7+ jobs :
8+ strategy :
9+ matrix :
10+ platform :
11+ - p5en.48xlarge
12+ - p5.48xlarge
13+ tuner-decisions-check :
14+ runs-on : ubuntu-22.04
15+ steps :
16+ - name : Install Dependencies
17+ run : |
18+ sudo apt-key del 7fa2af80
19+ wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
20+ sudo dpkg -i cuda-keyring_1.1-1_all.deb
21+ sudo apt-get update
22+ sudo apt-get install cuda-toolkit libhwloc-dev
23+ pip install uv
24+
25+
26+ - name : Fetch and Install EFA Installer
27+ run : |
28+ curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz
29+ tar -xf aws-efa-installer-*.tar.gz
30+ pushd aws-efa-installer/
31+ sudo ./efa_installer.sh -y --skip-kmod
32+ popd
33+
34+ - uses : actions/checkout@v4
35+ - name : Build Plugin
36+ run : |
37+ set -x
38+
39+ # actions/checkout@v4 would drop the plugin source in $PWD,
40+ # so go ahead and build it.
41+ ./autogen.sh
42+ ./configure --with-mpi=/opt/amazon/openmpi \
43+ --with-libfabric=/opt/amazon/efa \
44+ --with-cuda=/usr/local/cuda/ \
45+ --enable-platform-aws \
46+ --prefix=$PWD/install
47+ make -j 2
48+ make install
49+
50+ - name : Check Decisions
51+ run : |
52+ OFI_NCCL_FORCE_PRODUCT_NAME=${{ matrix.platform }} uv run --directory contrib/python show-tuner-decisions \
53+ ../..//install/lib/libnccl-ofi-tuner.so \
54+ --min-ranks-per-node 1 --max-ranks-per-node 8 \
55+ --min-nnodes 2 --max-nnodes 2048
You can’t perform that action at this time.
0 commit comments