|
106 | 106 | key: "rocm-build-openmpi"
|
107 | 107 | agents:
|
108 | 108 | queue: "juliagpu"
|
109 |
| - rocm: "*" # todo fix ROCM version |
| 109 | + rocm: "*" |
110 | 110 | env:
|
111 |
| - OPENMPI_VER: "4.1" |
112 |
| - OPENMPI_VER_FULL: "4.1.4" |
113 |
| - UCX_VER: "1.13-rc1" |
| 111 | + OPENMPI_VER: "5.0" |
| 112 | + OPENMPI_VER_FULL: "5.0.3" |
| 113 | + UCX_VER: "1.17.0" |
114 | 114 | CCACHE_DIR: "/root/ccache"
|
115 | 115 | commands: |
|
116 | 116 | echo "--- Install packages"
|
117 | 117 | apt-get install --yes --no-install-recommends curl ccache
|
118 | 118 | export PATH="/usr/lib/ccache/:$$PATH"
|
| 119 | +
|
119 | 120 | echo "--- Build UCX"
|
120 |
| - curl -L https://github.com/openucx/ucx/releases/download/v1.13.0-rc1/ucx-1.13.0.tar.gz --output ucx.tar.gz |
| 121 | + curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz |
121 | 122 | tar -zxf ucx.tar.gz
|
122 | 123 | pushd ucx-*
|
123 | 124 | ./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix)
|
124 | 125 | make -j
|
125 | 126 | make install
|
126 | 127 | popd
|
| 128 | +
|
127 | 129 | echo "--- Build OpenMPI"
|
128 | 130 | curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
|
129 | 131 | tar -zxf openmpi.tar.gz
|
130 |
| - pushd openmpi-* |
131 |
| - ./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix) |
| 132 | + pushd openmpi-$${OPENMPI_VER_FULL} |
| 133 | + ./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix) |
132 | 134 | make -j
|
133 | 135 | make install
|
134 | 136 | popd
|
| 137 | +
|
135 | 138 | echo "--- Package prefix"
|
136 | 139 | tar -zcf mpi-prefix.tar.gz mpi-prefix/
|
| 140 | +
|
137 | 141 | echo "--- ccache stats"
|
138 | 142 | ccache -s
|
139 | 143 | artifact_paths:
|
140 | 144 | - "mpi-prefix.tar.gz"
|
141 | 145 |
|
142 | 146 | - wait
|
143 | 147 |
|
144 |
| - - label: "Tests -- Julia latest" |
| 148 | + - label: "Tests -- Julia {{matrix.version}}" |
| 149 | + matrix: |
| 150 | + setup: |
| 151 | + version: |
| 152 | + - "1.10" |
| 153 | + concurrency: 1 |
| 154 | + concurrency_group: mpi_rocm |
145 | 155 | plugins:
|
146 | 156 | - JuliaCI/julia#v1:
|
147 |
| - version: "1" # failing on 1.8 |
| 157 | + version: "{{matrix.version}}" |
148 | 158 | persist_depot_dirs: packages,artifacts,compiled
|
149 | 159 | agents:
|
150 | 160 | queue: "juliagpu"
|
151 |
| - rocm: "*" # todo fix ROCM version |
| 161 | + rocm: "*" |
152 | 162 | if: build.message !~ /\[skip tests\]/
|
153 |
| - timeout_in_minutes: 60 |
154 |
| - soft_fail: |
155 |
| - - exit_status: 1 |
| 163 | + timeout_in_minutes: 90 |
156 | 164 | env:
|
157 | 165 | JULIA_MPI_TEST_NPROCS: 2
|
158 | 166 | JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
|
|
182 | 190 | '
|
183 | 191 |
|
184 | 192 | echo "+++ Run tests"
|
| 193 | + export JULIA_MPI_TEST_EXCLUDE="test_allreduce.jl,test_reduce.jl,test_scan.jl" |
185 | 194 | julia --color=yes --project=. -e '
|
186 | 195 | import Pkg
|
187 | 196 | Pkg.test("MPI"; test_args=["--backend=AMDGPU"])
|
|
0 commit comments