diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 405e39d5..e4c12bfa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -28,13 +28,13 @@ jobs: id: get-version run: | VERSION_PLAIN=$(cat VERSION) - + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then BRANCH_NAME="${{ inputs.branch_name }}" else BRANCH_NAME="${{ github.ref_name }}" fi - + if [[ "$BRANCH_NAME" == "release" ]]; then echo "version=${VERSION_PLAIN}" >> $GITHUB_OUTPUT echo "version_tag=v${VERSION_PLAIN}" >> $GITHUB_OUTPUT @@ -61,15 +61,18 @@ jobs: - build_target: "cpu" platform: "linux/amd64" runs_on: "ubuntu-latest" - - build_target: "gpu" + - build_target: "gpu" platform: "linux/amd64" runs_on: "ubuntu-latest" - build_target: "cpu" platform: "linux/arm64" runs_on: "ubuntu-24.04-arm" - build_target: "gpu" - platform: "linux/arm64" + platform: "linux/arm64" runs_on: "ubuntu-24.04-arm" + - build_target: "rocm" + platform: "linux/amd64" + runs_on: "ubuntu-latest" runs-on: ${{ matrix.runs_on }} steps: - name: Checkout repository @@ -118,12 +121,12 @@ jobs: PLATFORM="${{ matrix.platform }}" BUILD_TARGET="${{ matrix.build_target }}" VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}" - + echo "Building ${PLATFORM} image for ${BUILD_TARGET} version ${VERSION_TAG}" - + TARGET="${BUILD_TARGET}-$(echo ${PLATFORM} | cut -d'/' -f2)" echo "Using bake target: $TARGET" - + docker buildx bake $TARGET --push --progress=plain create-manifests: @@ -137,7 +140,7 @@ jobs: REPO: ${{ vars.REPO || 'kokoro-fastapi' }} strategy: matrix: - build_target: ["cpu", "gpu"] + build_target: ["cpu", "gpu", "rocm"] steps: - name: Log in to GitHub Container Registry uses: docker/login-action@v3 @@ -145,7 +148,7 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Create multi-platform manifest run: | VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}" @@ -153,12 +156,12 @@ jobs: REGISTRY="${{ env.REGISTRY }}" OWNER="${{ env.OWNER }}" REPO="${{ env.REPO }}" - + docker buildx imagetools create -t \ ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \ ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 \ ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64 - + if [[ "$VERSION_TAG" != *"-"* ]]; then docker buildx imagetools create -t \ ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \ diff --git a/docker-bake.hcl b/docker-bake.hcl index 8fd98bd6..89174aec 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -60,6 +60,13 @@ target "gpu" { ] } +# Base settings for AMD ROCm builds +target "_rocm_base" { + inherits = ["_common"] + dockerfile = "docker/rocm/Dockerfile" +} + + # Individual platform targets for debugging/testing target "cpu-amd64" { inherits = ["_cpu_base"] @@ -97,6 +104,16 @@ target "gpu-arm64" { ] } +# AMD ROCm only supports x86 +target "rocm-amd64" { + inherits = ["_rocm_base"] + platforms = ["linux/amd64"] + tags = [ + "${REGISTRY}/${OWNER}/${REPO}-rocm:${VERSION}-amd64", + "${REGISTRY}/${OWNER}/${REPO}-rocm:latest-amd64" + ] +} + # Development targets for faster local builds target "cpu-dev" { inherits = ["_cpu_base"] @@ -123,10 +140,14 @@ group "gpu-all" { targets = ["gpu", "gpu-amd64", "gpu-arm64"] } +group "rocm-all" { + targets = ["rocm-amd64"] +} + group "all" { - targets = ["cpu", "gpu"] + targets = ["cpu", "gpu", "rocm"] } group "individual-platforms" { - targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64"] + targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm-amd64"] } diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile new file mode 100644 index 00000000..4aea1276 --- /dev/null +++ b/docker/rocm/Dockerfile @@ -0,0 +1,81 @@ +FROM rocm/dev-ubuntu-24.04:6.4.4-complete +ENV DEBIAN_FRONTEND=noninteractive \ + PHONEMIZER_ESPEAK_PATH=/usr/bin \ + PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ + ESPEAK_DATA_PATH=/usr/share/espeak-ng-data + +# Install Python and other dependencies +RUN apt-get update && apt upgrade -y && apt-get install -y --no-install-recommends \ + espeak-ng \ + espeak-ng-data \ + rocrand \ + git \ + libsndfile1 \ + curl \ + ffmpeg \ + wget \ + nano \ + g++ \ + zstd \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && mkdir -p /usr/share/espeak-ng-data \ + && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ \ + + # Install UV using the installer script + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/ \ + && mv /root/.local/bin/uvx /usr/local/bin/ \ + + # Create non-root user and set up directories and permissions + && useradd -m -u 1001 appuser \ + && mkdir -p /app/api/src/models/v1_0 \ + && chown -R appuser:appuser /app \ + # Models folder + && mkdir -p /app/api/src/models/v1_0 + +USER appuser +WORKDIR /app + +# Copy dependency files +COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml + +ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \ + PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ + ESPEAK_DATA_PATH=/usr/share/espeak-ng-data + +# Install dependencies with GPU extras (using cache mounts) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv venv --python 3.12 && \ + uv sync --extra rocm + +# Run kdb files (shape files for MIOpen) +ENV ROCM_VERSION=6.4.4 +COPY --chown=appuser:appuser docker/rocm/kdb_install.sh /tmp/ +RUN /tmp/kdb_install.sh + +# Support older GFX Arch +RUN cd /tmp && wget https://archlinux.org/packages/extra/x86_64/rocblas/download -O rocblas.tar.zst \ + && pwd && ls -lah ./ \ + && tar --zstd -xvf rocblas.tar.zst && rm rocblas.tar.zst \ + && rm -rf /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/library/ \ + && mv ./opt/rocm/lib/rocblas/library/ /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/ + +# Copy project files including models +COPY --chown=appuser:appuser api ./api +COPY --chown=appuser:appuser web ./web +COPY --chown=appuser:appuser docker/scripts/ ./ + +RUN chmod +x ./entrypoint.sh + +# Set all environment variables in one go +ENV PYTHONUNBUFFERED=1 \ + PYTHONPATH=/app:/app/api \ + PATH="/app/.venv/bin:$PATH" \ + UV_LINK_MODE=copy \ + USE_GPU=true \ + DOWNLOAD_MODEL=true \ + DEVICE="gpu" + +# Run FastAPI server through entrypoint.sh +CMD ["./entrypoint.sh"] diff --git a/docker/rocm/docker-compose.yml b/docker/rocm/docker-compose.yml new file mode 100644 index 00000000..5a7650b7 --- /dev/null +++ b/docker/rocm/docker-compose.yml @@ -0,0 +1,35 @@ +services: + kokoro-tts: + image: kprinssu/kokoro-fastapi:rocm + devices: + - /dev/dri + - /dev/kfd + group_add: + # NOTE: These groups are the group ids for: video, input, and render + # Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3 + - 44 + - 993 + - 996 + restart: 'always' + volumes: + - ./kokoro-tts/config:/root/.config/miopen + - ./kokoro-tts/cache:/root/.cache/miopen + ports: + - 8880:8880 + environment: + - USE_GPU=true + - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 + # IMPORTANT: This is only required for RDNA 2 GPUs. You do not need the following steps if you use GPUS that are RDNA 1 (gfx1030) or older. + # ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model + # See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst + # The volumes above cache the MIOpen shape files and user database for subsequent runs + # + # Steps: + # 1. Run Kokoro once with the following environment variables set: + # - MIOPEN_FIND_MODE=3 + # - MIOPEN_FIND_ENFORCE=3 + # 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow + # 3. Comment out/remove the previously set environment variables + # 4. Add the following environment variables to enable caching of model shapes: + # - MIOPEN_FIND_MODE=2 + # 5. Restart the container and run Kokoro again, it should be much faster diff --git a/docker/rocm/kdb_install.sh b/docker/rocm/kdb_install.sh new file mode 100755 index 00000000..29f72865 --- /dev/null +++ b/docker/rocm/kdb_install.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +set -e + +ver() { + printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' '); +} + +# Sets GFX_ARCH to default if not set +if [ -z "$GFX_ARCH" ]; then + echo "WARNING: missing env var GFX_ARCH, using default (this will take longer)" + GFX_ARCHS=("gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx1030") +else + # Convert ; seperated string to array + IFS=';' read -ra GFX_ARCHS <<< "$GFX_ARCH" +fi + +# Sets ROCM_VERSION to "latest" if not set +if [ -z "$ROCM_VERSION" ]; then + echo "WARNING: missing env var ROCM_VERSION, using latest kdb repo (NOT RECOMMENDED)" + ROCM_VERSION="latest" +fi + +# Set PyTorch version and wheel install path +TORCH_INSTALL_PATH=$(uv pip show torch | grep Location | cut -d" " -f 2) + +# Check if Torch installation path exists +if [ ! -d "$TORCH_INSTALL_PATH" ]; then + echo "Error: Torch installation path '$TORCH_INSTALL_PATH' does not exist." + exit 1 +fi + +# Print variable overview +echo "ROCM version: $ROCM_VERSION" +echo "GFX architectures: ${GFX_ARCHS[@]}" +echo "PyTorch installation path: $TORCH_INSTALL_PATH" + +# Create directory for extraction +EXTRACT_DIR=extract_miopen_dbs +rm -rf $EXTRACT_DIR +mkdir -p "$EXTRACT_DIR" && cd "$EXTRACT_DIR" + +if [[ -f /etc/lsb-release ]]; then + # Exit if not 20.04, 22.04, or 24.04 + source /etc/lsb-release + echo "DISTRIB_RELEASE: $DISTRIB_RELEASE" + if [[ "$DISTRIB_RELEASE" != "20.04" && "$DISTRIB_RELEASE" != "22.04" ]]; then + if [[ "$ROCM_VERSION" != "latest" && $(ver $ROCM_VERSION) -lt $(ver 6.2) && "$DISTRIB_RELEASE" == "24.04" ]]; then + echo "ERROR: Unsupported Ubuntu version." + exit 1 + fi + fi + + for arch in "${GFX_ARCHS[@]}"; do + # Download MIOpen .kdbs for ROCm version and GPU architecture on ubuntu + echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..." + wget -q -r -np -nd -A miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb \ + https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/ + + # Check if files were downloaded. No KDB files in repo.radeon will result in error. + if ! ls miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb 1> /dev/null 2>&1; then + echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/ for supported architectures" + exit 1 + fi + done + + # Extract all .deb files to local directory + echo "Extracting deb packages for ${GFX_ARCHS[@]} ..." + for deb_file in `ls *deb`; do + echo "Extracting $deb_file..." + dpkg-deb -xv "$deb_file" . > /dev/null 2>&1 + done + +elif [[ -f /etc/centos-release || -f /etc/redhat-release ]]; then + # Centos kdbs + source /etc/os-release && RHEL_VERSION="$VERSION_ID" + RHEL_MAJOR_VERSION=${RHEL_VERSION%%.*} + echo "RHEL_VERSION: $RHEL_VERSION; RHEL_MAJOR_VERSION: $RHEL_MAJOR_VERSION" + if [[ ! "$RHEL_VERSION" =~ ^(8|9) ]]; then + echo "ERROR: Unsupported CentOS/RHEL release" + fi + for arch in "${GFX_ARCHS[@]}"; do + # Download MIOpen .kdbs for ROCm version and GPU architecture on centos + echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..." + wget -q -r -np -nd -A miopen-hip-$arch*kdb-[0-9]*rpm \ + https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main + + # Check if files were downloaded. No KDB files in repo.radeon will result in error. + if ! ls miopen-hip-$arch*kdb-*rpm 1> /dev/null 2>&1; then + echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main for supported architectures" + exit 1 + fi + done + + # Extract all RPM files to current directory + echo "Extracting rpm packages for ${GFX_ARCHS[@]} ..." + for rpm_file in `ls *rpm`; do + echo "Extracting $rpm_file..." + rpm2cpio "$rpm_file" | cpio -idmv 2> /dev/null + done +else + echo "ERROR: Unsupported operating system." + exit 1 +fi + +# Copy miopen db files to PyTorch installation path +echo "Copying kdb files to ${TORCH_INSTALL_PATH}/torch/share" +cp -ra opt/rocm-*/share/miopen $TORCH_INSTALL_PATH/torch/share + +# Remove downloaded files and extract directory +cd .. && rm -rf $EXTRACT_DIR +echo "Successfully installed MIOpen kernel database files" diff --git a/pyproject.toml b/pyproject.toml index f41a0dba..97e38f80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,10 @@ dependencies = [ [project.optional-dependencies] gpu = ["torch==2.8.0+cu129"] cpu = ["torch==2.8.0"] +rocm = [ + "torch==2.8.0+rocm6.4", + "pytorch-triton-rocm>=3.2.0", +] test = [ "pytest==8.3.5", "pytest-cov==6.0.0", @@ -55,9 +59,12 @@ test = [ ] [tool.uv] -conflicts = [[{ extra = "cpu" }, { extra = "gpu" }]] -required-environments = [ - "sys_platform == 'linux' and platform_machine == 'aarch64'" +conflicts = [ + [ + { extra = "cpu" }, + { extra = "gpu" }, + { extra = "rocm" }, + ], ] override-dependencies = [ "triton>=3.5.1 ; platform_machine == 'aarch64'" @@ -67,6 +74,10 @@ override-dependencies = [ torch = [ { index = "pytorch-cpu", extra = "cpu" }, { index = "pytorch-cuda", extra = "gpu" }, + { index = "pytorch-rocm", extra = "rocm" }, +] +pytorch-triton-rocm = [ + { index = "pytorch-rocm", extra = "rocm" }, ] [[tool.uv.index]] @@ -79,6 +90,11 @@ name = "pytorch-cuda" url = "https://download.pytorch.org/whl/cu129" explicit = true +[[tool.uv.index]] +name = "pytorch-rocm" +url = "https://download.pytorch.org/whl/rocm6.4" +explicit = true + [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta"