Skip to content

Commit 148692d

Browse files
authored
patch efa 1.42.0 (#4946)
1 parent dcf525b commit 148692d

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

scripts/install_efa.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@
22

33
set -ex
44

5+
function check_libnccl_net_so {
6+
OFI_LIB_DIR="/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu"
7+
NCCL_NET_SO="$OFI_LIB_DIR/libnccl-net.so"
8+
9+
# Check if file exists
10+
if [ ! -f "$NCCL_NET_SO" ]; then
11+
echo "ERROR: $NCCL_NET_SO does not exist"
12+
return 1
13+
fi
14+
}
15+
516
function install_efa {
617
EFA_VERSION=$1
718
OPEN_MPI_PATH="/opt/amazon/openmpi"
@@ -31,7 +42,7 @@ function install_efa {
3142
echo "rmaps_base_mapping_policy = slot" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf
3243
echo NCCL_DEBUG=INFO >> /etc/nccl.conf
3344
echo NCCL_SOCKET_IFNAME=^docker0,lo >> /etc/nccl.conf
34-
45+
3546
# Install OpenSSH for MPI to communicate between containers, allow OpenSSH to talk to containers without asking for confirmation
3647
apt-get install -y --no-install-recommends \
3748
openssh-client \
@@ -61,6 +72,7 @@ function install_efa {
6172
apt-get autoremove -y
6273
rm -rf /var/lib/apt/lists/*
6374
ldconfig
75+
check_libnccl_net_so
6476
}
6577

6678
# idiomatic parameter and option handling in sh

vllm/x86_64/gpu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND=noninteractive \
1212
PYTHONDONTWRITEBYTECODE=1 \
1313
PYTHONUNBUFFERED=1 \
1414
PYTHONIOENCODING=UTF-8 \
15-
LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
15+
LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
1616
PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"
1717

1818
WORKDIR /

0 commit comments

Comments
 (0)