Skip to content

Commit ec45de1

Browse files
authored
RHOAIENG-10783: fix(rocm): de-vendor the bundled rocm libraries from pytorch (#652)
* RHOAIENG-9853: fix(rocm): de-vendor the bundled rocm libraries from pytorch Use the script from instructlab to remove the duplicate copy of rocm libs from the image. This will make the image significantly smaller. Script lives at https://github.com/tiran/instructlab-containers/blob/main/containers/rocm/de-vendor-torch.sh * add a selftest
1 parent b5322d8 commit ec45de1

File tree

5 files changed

+104
-0
lines changed

5 files changed

+104
-0
lines changed

jupyter/rocm/pytorch/ubi9-python-3.9/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,13 @@ LABEL name="odh-notebook-jupyter-rocm-pytorch-ubi9-python-3.9" \
1313

1414
# Install Python packages and Jupyterlab extensions from Pipfile.lock
1515
COPY Pipfile.lock ./
16+
# Copy utility script
17+
COPY de-vendor-torch.sh ./
1618

1719
RUN echo "Installing softwares and packages" && micropipenv install && rm -f ./Pipfile.lock && \
20+
# De-vendor the ROCm libs that are embedded in Pytorch \
21+
./de-vendor-torch.sh && \
22+
rm ./de-vendor-torch.sh && \
1823
# Replace Notebook's launcher, "(ipykernel)" with Python's version 3.x.y
1924
sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \
2025
# Disable announcement plugin of jupyterlab
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/sh
2+
set -ex
3+
# Replace PyTorch's vendored shared libraries with system libraries
4+
# The script assumes that PyTorch is built with the same ROCm ABI as the
5+
# system installation of ROCm.
6+
7+
# Source: https://github.com/tiran/instructlab-containers/blob/main/containers/rocm/de-vendor-torch.sh
8+
9+
PYTHON=python3.9
10+
ROCMLIB=/opt/rocm/lib
11+
TORCHLIB=/opt/app-root/lib/${PYTHON}/site-packages/torch/lib
12+
13+
ln -sf /usr/lib64/libdrm.so.2 ${TORCHLIB}/libdrm.so
14+
ln -sf /usr/lib64/libdrm_amdgpu.so.1 ${TORCHLIB}/libdrm_amdgpu.so
15+
16+
ln -sf ${ROCMLIB}/libamd_comgr.so.2 ${TORCHLIB}/libamd_comgr.so
17+
ln -sf ${ROCMLIB}/libamdhip64.so.6 ${TORCHLIB}/libamdhip64.so
18+
ln -sf ${ROCMLIB}/libhipblaslt.so.0 ${TORCHLIB}/libhipblaslt.so
19+
ln -sf ${ROCMLIB}/libhipblas.so.2 ${TORCHLIB}/libhipblas.so
20+
ln -sf ${ROCMLIB}/libhipfft.so.0 ${TORCHLIB}/libhipfft.so
21+
ln -sf ${ROCMLIB}/libhiprand.so.1 ${TORCHLIB}/libhiprand.so
22+
ln -sf ${ROCMLIB}/libhiprtc.so.6 ${TORCHLIB}/libhiprtc.so
23+
ln -sf ${ROCMLIB}/libhipsolver.so.0 ${TORCHLIB}/libhipsolver.so
24+
ln -sf ${ROCMLIB}/libhipsparse.so.1 ${TORCHLIB}/libhipsparse.so
25+
ln -sf ${ROCMLIB}/libhsa-runtime64.so.1 ${TORCHLIB}/libhsa-runtime64.so
26+
ln -sf ${ROCMLIB}/libMIOpen.so.1 ${TORCHLIB}/libMIOpen.so
27+
ln -sf ${ROCMLIB}/librccl.so.1 ${TORCHLIB}/librccl.so
28+
ln -sf ${ROCMLIB}/librocblas.so.4 ${TORCHLIB}/librocblas.so
29+
ln -sf ${ROCMLIB}/librocfft.so.0 ${TORCHLIB}/librocfft.so
30+
ln -sf ${ROCMLIB}/librocm_smi64.so.6 ${TORCHLIB}/librocm_smi64.so
31+
ln -sf ${ROCMLIB}/librocrand.so.1 ${TORCHLIB}/librocrand.so
32+
ln -sf ${ROCMLIB}/librocsolver.so.0 ${TORCHLIB}/librocsolver.so
33+
ln -sf ${ROCMLIB}/librocsparse.so.1 ${TORCHLIB}/librocsparse.so
34+
ln -sf ${ROCMLIB}/libroctracer64.so.4 ${TORCHLIB}/libroctracer64.so
35+
ln -sf ${ROCMLIB}/libroctx64.so.4 ${TORCHLIB}/libroctx64.so
36+
37+
rm -rf ${TORCHLIB}/rocblas
38+
ln -sf ${ROCMLIB}/rocblas ${TORCHLIB}/rocblas
39+
40+
rm -rf ${TORCHLIB}/hipblaslt
41+
ln -sf ${ROCMLIB}/hipblaslt ${TORCHLIB}/hipblaslt

runtimes/rocm-pytorch/ubi9-python-3.9/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,15 @@ WORKDIR /opt/app-root/bin
1717
COPY Pipfile.lock ./
1818
# Copy Elyra dependencies for air-gapped enviroment
1919
COPY utils ./utils/
20+
# Copy utility script
21+
COPY de-vendor-torch.sh ./
2022

2123
RUN echo "Installing softwares and packages" && \
2224
micropipenv install && \
2325
rm -f ./Pipfile.lock && \
26+
# De-vendor the ROCm libs that are embedded in Pytorch \
27+
./de-vendor-torch.sh && \
28+
rm ./de-vendor-torch.sh && \
2429
# Fix permissions to support pip in Openshift environments \
2530
chmod -R g+w /opt/app-root/lib/python3.9/site-packages && \
2631
fix-permissions /opt/app-root -P
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/sh
2+
set -ex
3+
# Replace PyTorch's vendored shared libraries with system libraries
4+
# The script assumes that PyTorch is built with the same ROCm ABI as the
5+
# system installation of ROCm.
6+
7+
# Source: https://github.com/tiran/instructlab-containers/blob/main/containers/rocm/de-vendor-torch.sh
8+
9+
PYTHON=python3.9
10+
ROCMLIB=/opt/rocm/lib
11+
TORCHLIB=/opt/app-root/lib/${PYTHON}/site-packages/torch/lib
12+
13+
ln -sf /usr/lib64/libdrm.so.2 ${TORCHLIB}/libdrm.so
14+
ln -sf /usr/lib64/libdrm_amdgpu.so.1 ${TORCHLIB}/libdrm_amdgpu.so
15+
16+
ln -sf ${ROCMLIB}/libamd_comgr.so.2 ${TORCHLIB}/libamd_comgr.so
17+
ln -sf ${ROCMLIB}/libamdhip64.so.6 ${TORCHLIB}/libamdhip64.so
18+
ln -sf ${ROCMLIB}/libhipblaslt.so.0 ${TORCHLIB}/libhipblaslt.so
19+
ln -sf ${ROCMLIB}/libhipblas.so.2 ${TORCHLIB}/libhipblas.so
20+
ln -sf ${ROCMLIB}/libhipfft.so.0 ${TORCHLIB}/libhipfft.so
21+
ln -sf ${ROCMLIB}/libhiprand.so.1 ${TORCHLIB}/libhiprand.so
22+
ln -sf ${ROCMLIB}/libhiprtc.so.6 ${TORCHLIB}/libhiprtc.so
23+
ln -sf ${ROCMLIB}/libhipsolver.so.0 ${TORCHLIB}/libhipsolver.so
24+
ln -sf ${ROCMLIB}/libhipsparse.so.1 ${TORCHLIB}/libhipsparse.so
25+
ln -sf ${ROCMLIB}/libhsa-runtime64.so.1 ${TORCHLIB}/libhsa-runtime64.so
26+
ln -sf ${ROCMLIB}/libMIOpen.so.1 ${TORCHLIB}/libMIOpen.so
27+
ln -sf ${ROCMLIB}/librccl.so.1 ${TORCHLIB}/librccl.so
28+
ln -sf ${ROCMLIB}/librocblas.so.4 ${TORCHLIB}/librocblas.so
29+
ln -sf ${ROCMLIB}/librocfft.so.0 ${TORCHLIB}/librocfft.so
30+
ln -sf ${ROCMLIB}/librocm_smi64.so.6 ${TORCHLIB}/librocm_smi64.so
31+
ln -sf ${ROCMLIB}/librocrand.so.1 ${TORCHLIB}/librocrand.so
32+
ln -sf ${ROCMLIB}/librocsolver.so.0 ${TORCHLIB}/librocsolver.so
33+
ln -sf ${ROCMLIB}/librocsparse.so.1 ${TORCHLIB}/librocsparse.so
34+
ln -sf ${ROCMLIB}/libroctracer64.so.4 ${TORCHLIB}/libroctracer64.so
35+
ln -sf ${ROCMLIB}/libroctx64.so.4 ${TORCHLIB}/libroctx64.so
36+
37+
rm -rf ${TORCHLIB}/rocblas
38+
ln -sf ${ROCMLIB}/rocblas ${TORCHLIB}/rocblas
39+
40+
rm -rf ${TORCHLIB}/hipblaslt
41+
ln -sf ${ROCMLIB}/hipblaslt ${TORCHLIB}/hipblaslt

tests/test_main.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,15 @@ def test_image_pipfiles(subtests: pytest_subtests.plugin.SubTests):
2020
pipfile = tomllib.load(fp)
2121
assert "requires" in pipfile, "Pipfile is missing a [[requires]] section"
2222
assert pipfile["requires"]["python_version"] == python, "Pipfile does not declare the expected Python version"
23+
24+
25+
def test_files_that_should_be_same_are_same(subtests: pytest_subtests.plugin.SubTests):
26+
file_groups = {
27+
"ROCm de-vendor script":
28+
[PROJECT_ROOT / "jupyter/rocm/pytorch/ubi9-python-3.9/de-vendor-torch.sh",
29+
PROJECT_ROOT / "runtimes/rocm-pytorch/ubi9-python-3.9/de-vendor-torch.sh"]
30+
}
31+
for group_name, (first_file, *rest) in file_groups.items():
32+
with subtests.test(msg=f"Checking {group_name}"):
33+
for file in rest:
34+
assert first_file.read_text() == file.read_text(), f"The files {first_file} and {file} do not match"

0 commit comments

Comments
 (0)