Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions docker/rocm/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocm/dev-ubuntu-24.04:6.4.3
FROM rocm/dev-ubuntu-24.04:6.4.3-complete
ENV DEBIAN_FRONTEND=noninteractive \
PHONEMIZER_ESPEAK_PATH=/usr/bin \
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
Expand All @@ -16,6 +16,7 @@ RUN apt-get update && apt upgrade -y && apt-get install -y --no-install-recommen
wget \
nano \
g++ \
zstd \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /usr/share/espeak-ng-data \
Expand All @@ -32,7 +33,6 @@ RUN apt-get update && apt upgrade -y && apt-get install -y --no-install-recommen
&& chown -R appuser:appuser /app \
# Models folder
&& mkdir -p /app/api/src/models/v1_0
WORKDIR /app

USER appuser
WORKDIR /app
Expand All @@ -46,13 +46,26 @@ ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \

# Install dependencies with GPU extras (using cache mounts)
RUN --mount=type=cache,target=/root/.cache/uv \
uv venv --python 3.10 && \
uv venv --python 3.12 && \
uv sync --extra rocm

# Run kbd files
ENV ROCM_VERSION=6.4.3
COPY --chown=appuser:appuser docker/rocm/kbd_install.sh /tmp/
RUN /tmp/kbd_install.sh

# Support older GFX Arch
RUN cd /tmp && wget https://archlinux.org/packages/extra/x86_64/rocblas/download -O rocblas.tar.zst \
&& pwd && ls -lah ./ \
&& tar --zstd -xvf rocblas.tar.zst && rm rocblas.tar.zst \
&& rm -rf /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/library/ \
&& mv ./opt/rocm/lib/rocblas/library/ /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/

# Copy project files including models
COPY --chown=appuser:appuser api ./api
COPY --chown=appuser:appuser web ./web
COPY --chown=appuser:appuser docker/scripts/ ./

RUN chmod +x ./entrypoint.sh

# Set all environment variables in one go
Expand Down
112 changes: 112 additions & 0 deletions docker/rocm/kbd_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/bin/bash

set -e

ver() {
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
}

# Sets GFX_ARCH to default if not set
if [ -z "$GFX_ARCH" ]; then
echo "WARNING: missing env var GFX_ARCH, using default (this will take longer)"
GFX_ARCHS=("gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx1030")
else
# Convert ; seperated string to array
IFS=';' read -ra GFX_ARCHS <<< "$GFX_ARCH"
fi

# Sets ROCM_VERSION to "latest" if not set
if [ -z "$ROCM_VERSION" ]; then
echo "WARNING: missing env var ROCM_VERSION, using latest kdb repo (NOT RECOMMENDED)"
ROCM_VERSION="latest"
fi

# Set PyTorch version and wheel install path
TORCH_INSTALL_PATH=$(uv pip show torch | grep Location | cut -d" " -f 2)

# Check if Torch installation path exists
if [ ! -d "$TORCH_INSTALL_PATH" ]; then
echo "Error: Torch installation path '$TORCH_INSTALL_PATH' does not exist."
exit 1
fi

# Print variable overview
echo "ROCM version: $ROCM_VERSION"
echo "GFX architectures: ${GFX_ARCHS[@]}"
echo "PyTorch installation path: $TORCH_INSTALL_PATH"

# Create directory for extraction
EXTRACT_DIR=extract_miopen_dbs
rm -rf $EXTRACT_DIR
mkdir -p "$EXTRACT_DIR" && cd "$EXTRACT_DIR"

if [[ -f /etc/lsb-release ]]; then
# Exit if not 20.04, 22.04, or 24.04
source /etc/lsb-release
echo "DISTRIB_RELEASE: $DISTRIB_RELEASE"
if [[ "$DISTRIB_RELEASE" != "20.04" && "$DISTRIB_RELEASE" != "22.04" ]]; then
if [[ "$ROCM_VERSION" != "latest" && $(ver $ROCM_VERSION) -lt $(ver 6.2) && "$DISTRIB_RELEASE" == "24.04" ]]; then
echo "ERROR: Unsupported Ubuntu version."
exit 1
fi
fi

for arch in "${GFX_ARCHS[@]}"; do
# Download MIOpen .kdbs for ROCm version and GPU architecture on ubuntu
echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
wget -q -r -np -nd -A miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb \
https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/

# Check if files were downloaded. No KDB files in repo.radeon will result in error.
if ! ls miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb 1> /dev/null 2>&1; then
echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/ for supported architectures"
exit 1
fi
done

# Extract all .deb files to local directory
echo "Extracting deb packages for ${GFX_ARCHS[@]} ..."
for deb_file in `ls *deb`; do
echo "Extracting $deb_file..."
dpkg-deb -xv "$deb_file" . > /dev/null 2>&1
done

elif [[ -f /etc/centos-release || -f /etc/redhat-release ]]; then
# Centos kdbs
source /etc/os-release && RHEL_VERSION="$VERSION_ID"
RHEL_MAJOR_VERSION=${RHEL_VERSION%%.*}
echo "RHEL_VERSION: $RHEL_VERSION; RHEL_MAJOR_VERSION: $RHEL_MAJOR_VERSION"
if [[ ! "$RHEL_VERSION" =~ ^(8|9) ]]; then
echo "ERROR: Unsupported CentOS/RHEL release"
fi
for arch in "${GFX_ARCHS[@]}"; do
# Download MIOpen .kdbs for ROCm version and GPU architecture on centos
echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
wget -q -r -np -nd -A miopen-hip-$arch*kdb-[0-9]*rpm \
https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main

# Check if files were downloaded. No KDB files in repo.radeon will result in error.
if ! ls miopen-hip-$arch*kdb-*rpm 1> /dev/null 2>&1; then
echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main for supported architectures"
exit 1
fi
done

# Extract all RPM files to current directory
echo "Extracting rpm packages for ${GFX_ARCHS[@]} ..."
for rpm_file in `ls *rpm`; do
echo "Extracting $rpm_file..."
rpm2cpio "$rpm_file" | cpio -idmv 2> /dev/null
done
else
echo "ERROR: Unsupported operating system."
exit 1
fi

# Copy miopen db files to PyTorch installation path
echo "Copying kdb files to ${TORCH_INSTALL_PATH}/torch/share"
cp -ra opt/rocm-*/share/miopen $TORCH_INSTALL_PATH/torch/share

# Remove downloaded files and extract directory
cd .. && rm -rf $EXTRACT_DIR
echo "Successfully installed MIOpen kernel database files"