diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index abfcd7ed..70c29708 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10"] + python-version: ["3.12"] fail-fast: false steps: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 405e39d5..57cc4b89 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -70,6 +70,9 @@ jobs: - build_target: "gpu" platform: "linux/arm64" runs_on: "ubuntu-24.04-arm" + - build_target: "rocm1151" + platform: "linux/amd64" + runs_on: "ubuntu-latest" runs-on: ${{ matrix.runs_on }} steps: - name: Checkout repository @@ -137,7 +140,13 @@ jobs: REPO: ${{ vars.REPO || 'kokoro-fastapi' }} strategy: matrix: - build_target: ["cpu", "gpu"] + include: + - build_target: "cpu" + multiplatform: 'true' + - build_target: "gpu" + multiplatform: 'true' + - build_target: "rocm1151" + multiplatform: 'false' steps: - name: Log in to GitHub Container Registry uses: docker/login-action@v3 @@ -147,6 +156,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Create multi-platform manifest + if: ${{ matrix.multiplatform == 'true' }} run: | VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}" TARGET="${{ matrix.build_target }}" @@ -166,6 +176,25 @@ jobs: ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64 fi + - name: Create single-platform manifest + if: ${{ matrix.multiplatform != 'true' }} + run: | + VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}" + TARGET="${{ matrix.build_target }}" + REGISTRY="${{ env.REGISTRY }}" + OWNER="${{ env.OWNER }}" + REPO="${{ env.REPO }}" + + docker buildx imagetools create -t \ + ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \ + ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 + + if [[ "$VERSION_TAG" != *"-"* ]]; then + docker buildx imagetools create -t \ + ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \ + ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 + fi + create-release: needs: [prepare-release, create-manifests] runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 35fa9fb8..54861c42 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,7 @@ examples/ebook_test/parse_epub.py api/src/voices/af_jadzia.pt examples/assorted_checks/test_combinations/output/* examples/assorted_checks/test_openai/output/* +docker/rocm1151/kokoro-tts/ # Audio files diff --git a/.python-version b/.python-version index c8cfe395..e4fba218 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.10 +3.12 diff --git a/docker-bake.hcl b/docker-bake.hcl index 8fd98bd6..c814b83e 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -40,6 +40,12 @@ target "_gpu_base" { dockerfile = "docker/gpu/Dockerfile" } +# Base settings for AMD ROCm gfx 1151 builds +target "_rocm1151_base" { + inherits = ["_common"] + dockerfile = "docker/rocm1151/Dockerfile" +} + # CPU target with multi-platform support target "cpu" { inherits = ["_cpu_base"] @@ -60,6 +66,16 @@ target "gpu" { ] } +# ROCM 1151 target with multi-platform support +target "rocm1151" { + inherits = ["_rocm1151_base"] + platforms = ["linux/amd64" ] + tags = [ + "${REGISTRY}/${OWNER}/${REPO}-rocm1151:${VERSION}", + "${REGISTRY}/${OWNER}/${REPO}-rocm1151:latest" + ] +} + # Individual platform targets for debugging/testing target "cpu-amd64" { inherits = ["_cpu_base"] @@ -97,6 +113,16 @@ target "gpu-arm64" { ] } +# AMD ROCm target with multi-platform support +target "rocm1151-amd64" { + inherits = ["_rocm1151_base"] + platforms = ["linux/amd64"] + tags = [ + "${REGISTRY}/${OWNER}/${REPO}-rocm1151:${VERSION}-amd64", + "${REGISTRY}/${OWNER}/${REPO}-rocm1151:latest" + ] +} + # Development targets for faster local builds target "cpu-dev" { inherits = ["_cpu_base"] @@ -110,8 +136,14 @@ target "gpu-dev" { tags = ["${REGISTRY}/${OWNER}/${REPO}-gpu:dev"] } +target "rocm1151-dev" { + inherits = ["_rocm1151_base"] + # No multi-platform for dev builds + tags = ["${REGISTRY}/${OWNER}/${REPO}-rocm1151:dev"] +} + group "dev" { - targets = ["cpu-dev", "gpu-dev"] + targets = ["cpu-dev", "gpu-dev", "rocm1151-dev"] } # Build groups for different use cases @@ -123,10 +155,14 @@ group "gpu-all" { targets = ["gpu", "gpu-amd64", "gpu-arm64"] } +group "rocm1151-all" { + targets = ["rocm1151", "rocm1151-amd64" ] +} + group "all" { - targets = ["cpu", "gpu"] + targets = ["cpu", "gpu", "rocm1151"] } group "individual-platforms" { - targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64"] + targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm1151-amd64" ] } diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile index b004d7a0..13e49236 100644 --- a/docker/cpu/Dockerfile +++ b/docker/cpu/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim +FROM python:3.12-slim # Install dependencies and check espeak location # Rust is required to build sudachipy and pyopenjtalk-plus @@ -29,7 +29,7 @@ ENV PATH="/home/appuser/.cargo/bin:/app/.venv/bin:$PATH" \ COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml # Install dependencies with CPU extras -RUN uv venv --python 3.10 && \ +RUN uv venv --python 3.12 && \ uv sync --extra cpu --no-cache # Copy project files including models diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index 9083fa23..28c4b63d 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -2,7 +2,7 @@ FROM --platform=$BUILDPLATFORM nvcr.io/nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.0 # Install Python and other dependencies RUN apt-get update -y && \ - apt-get install -y python3.10 python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \ + apt-get install -y python3.12-dev python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ mkdir -p /usr/share/espeak-ng-data && \ ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \ @@ -20,7 +20,7 @@ WORKDIR /app COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml # Install dependencies with GPU extras -RUN uv venv --python 3.10 && \ +RUN uv venv --python 3.12 && \ uv sync --extra gpu --no-cache # Copy project files including models diff --git a/docker/rocm1151/Dockerfile b/docker/rocm1151/Dockerfile new file mode 100644 index 00000000..739cd999 --- /dev/null +++ b/docker/rocm1151/Dockerfile @@ -0,0 +1,60 @@ +FROM --platform=$BUILDPLATFORM python:3.12-slim + +# Install Python and other dependencies +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake \ + make wget gnupg2 ca-certificates libnuma1 libstdc++6 build-essential pigz && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + mkdir -p /usr/share/espeak-ng-data && \ + ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \ + curl -LsSf https://astral.sh/uv/install.sh | sh && \ + mv /root/.local/bin/uv /usr/local/bin/ && \ + mv /root/.local/bin/uvx /usr/local/bin/ && \ + useradd -m -u 1001 appuser && \ + mkdir -p /app/api/src/models/v1_0 && \ + chown -R appuser:appuser /app + +USER appuser +WORKDIR /app +RUN wget --progress=dot:giga https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-7.10.0.tar.gz && \ + mkdir rocm_install && \ + tar -I pigz -xf *.tar.gz -C rocm_install && \ + rm *.tar.gz + +ENV ROCM_PATH=/app/rocm_install +ENV PATH=$PATH:$ROCM_PATH/bin +ENV LD_LIBRARY_PATH=$ROCM_PATH/lib + +# Copy dependency files +COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml + +# Install dependencies with ROCM 1151 extras +RUN uv venv --python 3.12 && \ + uv sync --extra rocm1151 --no-cache + +# Copy project files including models +COPY --chown=appuser:appuser api ./api +COPY --chown=appuser:appuser web ./web +COPY --chown=appuser:appuser docker/scripts/ ./ +RUN chmod +x ./entrypoint.sh + + +# Set all environment variables in one go +ENV PATH="/app/.venv/bin:$PATH" \ + PYTHONUNBUFFERED=1 \ + PYTHONPATH=/app:/app/api \ + UV_LINK_MODE=copy \ + USE_GPU=true \ + PHONEMIZER_ESPEAK_PATH=/usr/bin \ + PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ + ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \ + DEVICE="rocm1151" + +ENV DOWNLOAD_MODEL=true +# Download model if enabled +RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \ + python download_model.py --output api/src/models/v1_0; \ + fi + +# Run FastAPI server through entrypoint.sh +CMD ["./entrypoint.sh"] diff --git a/docker/rocm1151/docker-compose.yml b/docker/rocm1151/docker-compose.yml new file mode 100644 index 00000000..0eadaf73 --- /dev/null +++ b/docker/rocm1151/docker-compose.yml @@ -0,0 +1,45 @@ +name: kokoro-tts-rocm1151 +services: + kokoro-tts: + # image: ghcr.io/remsky/kokoro-fastapi-rocm1151:v${VERSION} + build: + context: ../.. + dockerfile: docker/rocm1151/Dockerfile + volumes: + - ../../api:/app/api + # These are caches used by ROCm's MIOpen library to speed up kernel selection + - ./kokoro-tts/config:/root/.config/miopen + - ./kokoro-tts/cache:/root/.cache/miopen + user: "1001:1001" # Ensure container runs as UID 1001 (appuser) + ports: + - "8880:8880" + environment: + - PYTHONPATH=/app:/app/api + - USE_GPU=true + - PYTHONUNBUFFERED=1 + - API_LOG_LEVEL=DEBUG + - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 + # IMPORTANT: ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model + # See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst + # 1. Run Kokoro once with the following environment variables set: + - MIOPEN_FIND_MODE=3 + - MIOPEN_FIND_ENFORCE=3 + # 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow + # 3. Comment out/remove the previously set environment variables + # 4. Add the following environment variables to enable caching of model shapes: + # - MIOPEN_FIND_MODE=2 + # 5. Restart the container and run Kokoro again, it should be much faster + devices: + - /dev/dri + - /dev/kfd + security_opt: + - seccomp:unconfined + cap_add: + - SYS_PTRACE + group_add: + # NOTE: These groups are the group ids for: video and render + # Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3 + - 42 #video + - 992 #render + + diff --git a/pyproject.toml b/pyproject.toml index f41a0dba..c7a70138 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "kokoro-fastapi" version = "0.3.0" description = "FastAPI TTS Service" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.12" dependencies = [ # Core dependencies "fastapi==0.115.6", @@ -45,6 +45,13 @@ dependencies = [ [project.optional-dependencies] gpu = ["torch==2.8.0+cu129"] cpu = ["torch==2.8.0"] +rocm1151 = [ + "torch==2.9.1", + "pytorch-triton-rocm==3.5.1", + "rocm==7.10.0", + "rocm-sdk-core==7.10.0", + "rocm-sdk-libraries-gfx1151==7.10.0", +] test = [ "pytest==8.3.5", "pytest-cov==6.0.0", @@ -55,9 +62,9 @@ test = [ ] [tool.uv] -conflicts = [[{ extra = "cpu" }, { extra = "gpu" }]] +conflicts = [[{ extra = "cpu" }, { extra = "gpu" }, { extra = "rocm1151" }]] required-environments = [ - "sys_platform == 'linux' and platform_machine == 'aarch64'" + "sys_platform == 'linux'" ] override-dependencies = [ "triton>=3.5.1 ; platform_machine == 'aarch64'" @@ -67,7 +74,12 @@ override-dependencies = [ torch = [ { index = "pytorch-cpu", extra = "cpu" }, { index = "pytorch-cuda", extra = "gpu" }, + { index = "pytorch-rocm1151", extra = "rocm1151" } ] +pytorch-triton-rocm = { index = "pytorch-rocm1151" } +rocm = { index = "pytorch-rocm1151" } +rocm-sdk-core = { index = "pytorch-rocm1151" } +rocm-sdk-libraries-gfx1151 = { index = "pytorch-rocm1151" } [[tool.uv.index]] name = "pytorch-cpu" @@ -79,6 +91,11 @@ name = "pytorch-cuda" url = "https://download.pytorch.org/whl/cu129" explicit = true +[[tool.uv.index]] +name = "pytorch-rocm1151" +url = "https://repo.amd.com/rocm/whl/gfx1151/" +explicit = true + [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" diff --git a/start-rocm1151.ps1 b/start-rocm1151.ps1 new file mode 100644 index 00000000..08f58562 --- /dev/null +++ b/start-rocm1151.ps1 @@ -0,0 +1,13 @@ +$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll" +$env:PYTHONUTF8=1 +$Env:PROJECT_ROOT="$pwd" +$Env:USE_GPU="true" +$Env:USE_ONNX="false" +$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api" +$Env:MODEL_DIR="src/models" +$Env:VOICES_DIR="src/voices/v1_0" +$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web" + +uv pip install -e ".[rocm1151]" +uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0 +uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880 \ No newline at end of file diff --git a/start-rocm1151.sh b/start-rocm1151.sh new file mode 100755 index 00000000..2372e9f5 --- /dev/null +++ b/start-rocm1151.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Get project root directory +PROJECT_ROOT=$(pwd) + +# Set environment variables +export USE_GPU=true +export USE_ONNX=false +export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api +export MODEL_DIR=src/models +export VOICES_DIR=src/voices/v1_0 +export WEB_PLAYER_PATH=$PROJECT_ROOT/web + +# Run FastAPI with ROCM extras using uv run +# Note: espeak may still require manual installation, +uv pip install -e ".[rocm1151]" +uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0 +uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880