Skip to content

Commit 3fa53ef

Browse files
Merge pull request #55 from oracle-quickstart/locality-doc-update
Update network locality doc and add image building instructions
2 parents 294cd71 + af4ab94 commit 3fa53ef

File tree

9 files changed

+508
-4
lines changed

9 files changed

+508
-4
lines changed

docker/nccl-tests/Dockerfile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
ARG PYTORCH_IMAGE_TAG
2+
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:${PYTORCH_IMAGE_TAG}
3+
4+
FROM ${BASE_IMAGE}
5+
6+
RUN apt-get update && apt-get install -y \
7+
build-essential \
8+
infiniband-diags \
9+
openssh-server \
10+
kmod \
11+
wget \
12+
&& rm -rf /var/lib/apt/lists/*
13+
14+
ARG NCCL_VERSION
15+
ARG NCCL_TESTS_VERSION
16+
17+
RUN wget -qO- https://github.com/NVIDIA/nccl/archive/refs/tags/v${NCCL_VERSION}.tar.gz | tar -xvz && \
18+
cd nccl-${NCCL_VERSION} && \
19+
make -j src.build
20+
21+
RUN wget -qO- https://github.com/NVIDIA/nccl-tests/archive/refs/tags/v${NCCL_TESTS_VERSION}.tar.gz | tar -xvz && \
22+
cd nccl-tests-${NCCL_TESTS_VERSION} && \
23+
make -j MPI=1 MPI_HOME=/usr/local/mpi NCCL_HOME=/workspace/nccl-${NCCL_VERSION}/build && \
24+
ln -s /workspace/nccl-tests-${NCCL_TESTS_VERSION} /workspace/nccl-tests

docker/nccl-tests/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Building the NCCL tests container
2+
3+
You can change the values of the variables based on the version combination you want to have in your image.
4+
5+
```
6+
docker build -t nccl-tests \
7+
--build-arg PYTORCH_IMAGE_TAG=25.03-py3 \
8+
--build-arg NCCL_VERSION=2.26.2-1 \
9+
--build-arg NCCL_TESTS_VERSION=2.14.1 \
10+
--pull .
11+
```

docker/node-ordering/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM python:3.10.10-bullseye
2+
3+
COPY ./node_ordering.py /
4+
COPY ./entrypoint.sh /
5+
6+
RUN pip3 install pssh parallel-ssh
7+
RUN chmod +x /entrypoint.sh /node_ordering.py
8+
9+
RUN apt-get -qq update && apt-get install openssh-server dnsutils -y --allow-change-held-packages --no-install-recommends \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
13+
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
14+
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
15+
mkdir /var/run/sshd -p
16+
17+
ENTRYPOINT ["/entrypoint.sh"]

docker/node-ordering/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Building the Node Ordering container
2+
The Node Ordering script [node_ordering.py](./node_ordering.py) uses port 2222 by default. If you want to use a different port, change the port in [line 84](./node_ordering.py?plain=1#L84).

docker/node-ordering/entrypoint.sh

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
3+
function resolve_host() {
4+
host="$1"
5+
check="nslookup $host"
6+
max_retry=10
7+
counter=0
8+
backoff=0.1
9+
until $check > /dev/null
10+
do
11+
if [ $counter -eq $max_retry ]; then
12+
return
13+
fi
14+
sleep $backoff
15+
((counter++))
16+
backoff=$(echo - | awk "{print $backoff + $backoff}")
17+
done
18+
}
19+
20+
until [ $(cat /etc/mpi/discover_hosts.sh | wc -l) != 1 ]
21+
do
22+
sleep 5
23+
echo "$(date) -- Waiting for all worker pods to be ready"
24+
done
25+
26+
cat /etc/mpi/hostfile | while read host
27+
do
28+
resolve_host $host
29+
done
30+
31+
/etc/mpi/discover_hosts.sh > /node-ordering/hosts
32+
33+
/node_ordering.py --input_file /node-ordering/hosts > /dev/null
34+
35+
cp /ordered_hostfile /node-ordering/
36+
37+
echo "$(date) -- All worker pods are ready"
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/env python3
2+
import json
3+
import os
4+
import argparse
5+
import subprocess
6+
import sys
7+
8+
def write_ordered_hostfile(ordered_hosts=[],hostfile=None,srun=False):
9+
#ordered_hostfile="ordered_hostfile"
10+
if os.path.isfile(hostfile):
11+
os.remove(hostfile)
12+
fhandler = open(hostfile,"w")
13+
for h in ordered_hosts:
14+
if srun:
15+
for x in range(8):
16+
fhandler.write(h+"\n")
17+
else:
18+
fhandler.write(h+"\n")
19+
fhandler.close()
20+
21+
def write_ordered_rankfile(ordered_hosts=[],hostfile=None):
22+
if os.path.isfile(hostfile):
23+
os.remove(hostfile)
24+
fhandler = open(hostfile,"w")
25+
for index,h in enumerate(ordered_hosts):
26+
for gpu_index in range(gpus):
27+
fhandler.write("rank "+str(index*gpus+gpu_index)+"="+h+" slot="+str(gpu_index)+"\n")
28+
fhandler.close()
29+
30+
31+
def get_swicthname(host):
32+
try:
33+
command = "scontrol show topology "+host+" | grep Level=0"
34+
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
35+
switchname=result.stdout.split(" ")[0].replace("SwitchName=","")
36+
return switchname
37+
except Exception as e:
38+
print(f"Error grabbing switchname: {e}")
39+
sys.exit(1)
40+
41+
gpus=8
42+
parser = argparse.ArgumentParser(description='Script to order hostnames for optimal performance based on rack Id')
43+
parser.add_argument('--input_file', help='Path of the input file which has host names. One hostname on each line in the file')
44+
args = parser.parse_args()
45+
46+
if args.input_file is None:
47+
input_file=''
48+
#/etc/opt/oci-hpc/hostfile.tcp'
49+
exit()
50+
else:
51+
input_file=args.input_file
52+
53+
with open(input_file, 'r') as f:
54+
#with open('./hostfile', 'r') as f:
55+
#with open('/etc/opt/oci-hpc/hostfile.tcp', 'r') as f:
56+
hosts = f.read().splitlines()
57+
58+
59+
r = {}
60+
friendly_name_to_system_hostname = {}
61+
for i in hosts:
62+
print(i)
63+
64+
topology_command="scontrol show topology"
65+
try:
66+
result = subprocess.run(topology_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
67+
if result.stderr == "":
68+
slurm = True
69+
else:
70+
slurm = False
71+
except:
72+
slurm = False
73+
if slurm:
74+
for host in hosts:
75+
switch=get_swicthname(host)
76+
if switch in r.keys():
77+
r[switch].append( host )
78+
else:
79+
r[switch] = [ host ]
80+
friendly_name_to_system_hostname[host]=host
81+
else:
82+
try:
83+
from pssh.clients import ParallelSSHClient
84+
client = ParallelSSHClient(hosts,port=2222)
85+
output = client.run_command('curl http://169.254.169.254/opc/v1/host/')
86+
for host_out in output:
87+
j = json.loads(bytearray(''.join(list(host_out.stdout)).encode()))
88+
try:
89+
rackID=j['rdmaTopologyData']['customerLocalBlock']
90+
except:
91+
rackID = j['rackId']
92+
if rackID in r:
93+
r[rackID].append( host_out.host )
94+
else:
95+
r[rackID] = [ host_out.host ]
96+
hostname_output = client.run_command('/usr/bin/hostname')
97+
for host_out in hostname_output:
98+
j = bytearray(''.join(list(host_out.stdout)).encode())
99+
friendly_name_to_system_hostname[host_out.host] = j.decode(encoding='ascii')
100+
#print(j.decode(encoding='ascii')+" "+host_out.host)
101+
except ImportError:
102+
try:
103+
for h in hosts:
104+
out = subprocess.run(["ssh "+h+" \"curl -s http://169.254.169.254/opc/v1/host/\""],stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, universal_newlines=True, check=True)
105+
x = out.stdout.splitlines()
106+
json_str = ''.join(x)
107+
json_data = json.loads(json_str)
108+
rackId = json_data.get("rackId", None)
109+
if rackId in r:
110+
r[rackId].append( h )
111+
else:
112+
r[rackId] = [ h ]
113+
for h in hosts:
114+
out = subprocess.run(["ssh "+h+" /usr/bin/hostname"],stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, universal_newlines=True, check=True)
115+
x = out.stdout.splitlines()
116+
friendly_name_to_system_hostname[h] = x[0]
117+
except subprocess.CalledProcessError as e_process_error:
118+
exit(f"Error code: {e_process_error.returncode} Output: {e_process_error.output}")
119+
120+
121+
ordered_hosts = []
122+
ordered_hosts_friendly_name = []
123+
# sort racks by amount of hosts (descending)
124+
racks_sorted = sorted(r.items(), key=lambda x: len(x[1]), reverse=True)
125+
i = 0
126+
fhandler = open("node_switch_list","w")
127+
for k, v in racks_sorted:
128+
i += 1
129+
print(f'# rack {i}')
130+
rack_data_prefix = "SwitchName=rack"+str(i)+" Nodes="
131+
rack_nodes = []
132+
for h in v:
133+
fhandler.write("Node "+h+" from switch number "+str(i)+"\n")
134+
print(h)
135+
ordered_hosts.append(h)
136+
ordered_hosts_friendly_name.append(friendly_name_to_system_hostname[h])
137+
rack_nodes.append(friendly_name_to_system_hostname[h])
138+
rack_data = rack_data_prefix + ','.join([str(node) for node in rack_nodes])
139+
fhandler.close()
140+
hostfile="ordered_hostfile"
141+
write_ordered_hostfile(ordered_hosts,hostfile)
142+
hostfile="ordered_hostfile_system_name"
143+
write_ordered_hostfile(ordered_hosts_friendly_name,hostfile)
144+
hostfile="ordered_hostfile_system_name_srun"
145+
write_ordered_hostfile(ordered_hosts_friendly_name,hostfile,True)
146+
rankfile="rankfile_system_name"
147+
write_ordered_rankfile(ordered_hosts_friendly_name,rankfile)

docker/rccl-tests/Dockerfile

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
## base docker image
2+
ARG ROCM_IMAGE_NAME=rocm/dev-ubuntu-22.04
3+
ARG ROCM_IMAGE_TAG=6.3.2
4+
FROM "${ROCM_IMAGE_NAME}:${ROCM_IMAGE_TAG}"
5+
6+
## rccl repo
7+
ARG RCCL_REPO=https://github.com/ROCm/rccl
8+
ARG RCCL_BRANCH=develop
9+
10+
## rccl-tests repo
11+
ARG RCCL_TESTS_REPO=https://github.com/ROCm/rccl-tests
12+
ARG RCCL_TESTS_BRANCH=develop
13+
14+
## Mellanox OFED version
15+
ARG MELLANOX_OFED_VERSION
16+
17+
## creating scratch space
18+
RUN mkdir -p /workspace
19+
WORKDIR /workspace
20+
21+
## install dependencies
22+
RUN apt-get update \
23+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
24+
ca-certificates \
25+
git \
26+
openssh-server \
27+
iputils-ping \
28+
net-tools \
29+
make \
30+
rocm-cmake \
31+
ninja-build \
32+
gfortran \
33+
build-essential \
34+
libomp5 \
35+
libomp-dev \
36+
libbfd-dev \
37+
libboost-all-dev \
38+
libnuma1 \
39+
libnuma-dev \
40+
libpthread-stubs0-dev \
41+
libzstd-dev \
42+
lcov \
43+
zip \
44+
zlib1g-dev \
45+
wget \
46+
pkg-config \
47+
unzip \
48+
chrpath \
49+
doxygen \
50+
lshw \
51+
build-essential \
52+
libssl-dev \
53+
curl \
54+
libncursesw5-dev \
55+
xz-utils \
56+
liblzma-dev \
57+
python3-pip \
58+
python3-setuptools \
59+
python3-venv \
60+
python3-dev \
61+
python3-tk \
62+
python3-yaml \
63+
wget \
64+
&& \
65+
apt-get clean && \
66+
rm -rf /var/lib/apt/lists/*
67+
68+
# Mellanox OFED
69+
RUN wget -qO - https://www.mellanox.com/downloads/ofed/RPM-GPG-KEY-Mellanox | apt-key add -
70+
RUN cd /etc/apt/sources.list.d/ && wget https://linux.mellanox.com/public/repo/mlnx_ofed/${MELLANOX_OFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list
71+
72+
RUN apt-get -qq update \
73+
&& apt-get -qq install -y --no-install-recommends \
74+
ibverbs-utils libibverbs-dev libibumad3 libibumad-dev librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils \
75+
&& rm -rf /var/lib/apt/lists/*
76+
77+
RUN wget https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-linux-x86_64.sh \
78+
&& chmod +x cmake-3.28.0-linux-x86_64.sh \
79+
&& bash ./cmake-3.28.0-linux-x86_64.sh --prefix=/usr --exclude-subdir --skip-license \
80+
&& rm cmake-3.28.0-linux-x86_64.sh
81+
82+
## Install UCX
83+
ENV UCX_INSTALL_PREFIX=/opt/ucx
84+
RUN wget https://github.com/openucx/ucx/releases/download/v1.16.0/ucx-1.16.0.tar.gz \
85+
&& mkdir -p ucx \
86+
&& tar -zxf ucx-1.16.0.tar.gz -C ucx --strip-components=1 \
87+
&& cd ucx \
88+
&& mkdir build \
89+
&& cd build \
90+
&& ../configure --prefix=${UCX_INSTALL_PREFIX} --with-rocm=/opt/rocm \
91+
&& make -j$(nproc) install \
92+
&& cd ../.. \
93+
&& rm -rf ucx ucx-1.16.0.tar.gz
94+
95+
## Install OpenMPI
96+
ENV MPI_INSTALL_PREFIX=/opt/ompi
97+
RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz \
98+
&& mkdir -p ompi4 \
99+
&& tar -zxf openmpi-4.1.6.tar.gz -C ompi4 --strip-components=1 \
100+
&& cd ompi4 \
101+
&& mkdir build \
102+
&& cd build \
103+
&& ../configure --prefix=${MPI_INSTALL_PREFIX} --with-ucx=${UCX_INSTALL_PREFIX} --disable-oshmem --disable-mpi-fortran --enable-orterun-prefix-by-default \
104+
&& make -j$(nproc) install \
105+
&& cd ../.. \
106+
&& rm -rf ompi4 openmpi-4.1.6.tar.gz
107+
108+
## building RCCL
109+
ENV RCCL_INSTALL_PREFIX=/opt/rocm
110+
RUN git clone --recurse-submodules -b "${RCCL_BRANCH}" "${RCCL_REPO}" ./rccl \
111+
&& cd ./rccl \
112+
&& ./install.sh -t -j$(nproc) --amdgpu_targets="gfx942" --prefix=${RCCL_INSTALL_PREFIX}
113+
114+
## building RCCL-Tests
115+
RUN git clone -b "${RCCL_TESTS_BRANCH}" "${RCCL_TESTS_REPO}" ./rccl-tests \
116+
&& cd ./rccl-tests \
117+
&& make MPI=1 MPI_HOME=${MPI_INSTALL_PREFIX} NCCL_HOME=${RCCL_INSTALL_PREFIX} -j$(nproc)
118+
119+
## set environment variables
120+
ENV PATH="${RCCL_INSTALL_PREFIX}/bin:${MPI_INSTALL_PREFIX}/bin:${PATH}"
121+
ENV LD_LIBRARY_PATH="${RCCL_INSTALL_PREFIX}/lib:${MPI_INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH}"
122+
123+
# Configure SSH
124+
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
125+
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
126+
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
127+
mkdir /var/run/sshd -p

docker/rccl-tests/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Building the RCCL tests container
2+
3+
You can change the values of the variables based on the version combination you want to have in your image.
4+
5+
```
6+
docker build -t rccl-tests \
7+
--build-arg="ROCM_IMAGE_NAME=rocm/dev-ubuntu-22.04" \
8+
--build-arg="ROCM_IMAGE_TAG=6.3.2" \
9+
--build-arg="GPU_TARGETS=gfx942" \
10+
--pull .
11+
```

0 commit comments

Comments
 (0)