remsky · projects-land · Dec 22, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
       fail-fast: false
 
     steps:

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -70,6 +70,9 @@ jobs:
           - build_target: "gpu"
             platform: "linux/arm64" 
             runs_on: "ubuntu-24.04-arm"
+          - build_target: "rocm1151"
+            platform: "linux/amd64"
+            runs_on: "ubuntu-latest"
     runs-on: ${{ matrix.runs_on }}
     steps:
       - name: Checkout repository
@@ -137,7 +140,13 @@ jobs:
       REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
     strategy:
       matrix:
-        build_target: ["cpu", "gpu"]
+        include:
+          - build_target: "cpu"
+            multiplatform: 'true'
+          - build_target: "gpu" 
+            multiplatform: 'true'
+          - build_target: "rocm1151"
+            multiplatform: 'false'
     steps:
       - name: Log in to GitHub Container Registry
         uses: docker/login-action@v3
@@ -147,6 +156,7 @@ jobs:
           password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Create multi-platform manifest
+        if: ${{ matrix.multiplatform == 'true' }}
         run: |
           VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
           TARGET="${{ matrix.build_target }}"
@@ -166,6 +176,25 @@ jobs:
               ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
           fi
 
+      - name: Create single-platform manifest
+        if: ${{ matrix.multiplatform != 'true' }}
+        run: |
+          VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
+          TARGET="${{ matrix.build_target }}"
+          REGISTRY="${{ env.REGISTRY }}"
+          OWNER="${{ env.OWNER }}"
+          REPO="${{ env.REPO }}"
+
+          docker buildx imagetools create -t \
+            ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \
+            ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64
+
+          if [[ "$VERSION_TAG" != *"-"* ]]; then
+            docker buildx imagetools create -t \
+              ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \
+              ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64
+          fi
+
   create-release:
     needs: [prepare-release, create-manifests]
     runs-on: ubuntu-latest

diff --git a/.gitignore b/.gitignore
@@ -57,6 +57,7 @@ examples/ebook_test/parse_epub.py
 api/src/voices/af_jadzia.pt
 examples/assorted_checks/test_combinations/output/*
 examples/assorted_checks/test_openai/output/*
+docker/rocm1151/kokoro-tts/
 
 
 # Audio files

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.10
+3.12
diff --git a/docker-bake.hcl b/docker-bake.hcl
@@ -40,6 +40,12 @@ target "_gpu_base" {
     dockerfile = "docker/gpu/Dockerfile"
 }
 
+# Base settings for AMD ROCm gfx 1151 builds
+target "_rocm1151_base" {
+    inherits = ["_common"]
+    dockerfile = "docker/rocm1151/Dockerfile"
+}
+
 # CPU target with multi-platform support
 target "cpu" {
     inherits = ["_cpu_base"]
@@ -60,6 +66,16 @@ target "gpu" {
     ]
 }
 
+# ROCM 1151 target with multi-platform support
+target "rocm1151" {
+    inherits = ["_rocm1151_base"]
+    platforms = ["linux/amd64" ]
+    tags = [
+        "${REGISTRY}/${OWNER}/${REPO}-rocm1151:${VERSION}",
+        "${REGISTRY}/${OWNER}/${REPO}-rocm1151:latest"
+    ]
+}
+
 # Individual platform targets for debugging/testing
 target "cpu-amd64" {
     inherits = ["_cpu_base"]
@@ -97,6 +113,16 @@ target "gpu-arm64" {
     ]
 }
 
+# AMD ROCm target with multi-platform support
+target "rocm1151-amd64" {
+    inherits = ["_rocm1151_base"]
+    platforms = ["linux/amd64"]
+    tags = [
+        "${REGISTRY}/${OWNER}/${REPO}-rocm1151:${VERSION}-amd64",
+        "${REGISTRY}/${OWNER}/${REPO}-rocm1151:latest"
+    ]
+}
+
 # Development targets for faster local builds
 target "cpu-dev" {
     inherits = ["_cpu_base"]
@@ -110,8 +136,14 @@ target "gpu-dev" {
     tags = ["${REGISTRY}/${OWNER}/${REPO}-gpu:dev"]
 }
 
+target "rocm1151-dev" {
+    inherits = ["_rocm1151_base"]
+    # No multi-platform for dev builds
+    tags = ["${REGISTRY}/${OWNER}/${REPO}-rocm1151:dev"]
+}
+
 group "dev" {
-    targets = ["cpu-dev", "gpu-dev"]
+    targets = ["cpu-dev", "gpu-dev", "rocm1151-dev"]
 }
 
 # Build groups for different use cases
@@ -123,10 +155,14 @@ group "gpu-all" {
     targets = ["gpu", "gpu-amd64", "gpu-arm64"]
 }
 
+group "rocm1151-all" {
+    targets = ["rocm1151", "rocm1151-amd64" ]
+}
+
 group "all" {
-    targets = ["cpu", "gpu"]
+    targets = ["cpu", "gpu", "rocm1151"]
 }
 
 group "individual-platforms" {
-    targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64"]
+    targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm1151-amd64" ]
 }
diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.10-slim 
+FROM python:3.12-slim 
 
 # Install dependencies and check espeak location
 # Rust is required to build sudachipy and pyopenjtalk-plus
@@ -29,7 +29,7 @@ ENV PATH="/home/appuser/.cargo/bin:/app/.venv/bin:$PATH" \
 COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
 
 # Install dependencies with CPU extras
-RUN uv venv --python 3.10 && \
+RUN uv venv --python 3.12 && \
     uv sync --extra cpu --no-cache
 
 # Copy project files including models

diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
@@ -2,7 +2,7 @@ FROM --platform=$BUILDPLATFORM nvcr.io/nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.0
 
 # Install Python and other dependencies
 RUN apt-get update -y &&  \
-    apt-get install -y python3.10 python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
+    apt-get install -y python3.12-dev python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     mkdir -p /usr/share/espeak-ng-data &&  \
     ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
@@ -20,7 +20,7 @@ WORKDIR /app
 COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
 
 # Install dependencies with GPU extras
-RUN uv venv --python 3.10 && \
+RUN uv venv --python 3.12 && \
     uv sync --extra gpu --no-cache
 
 # Copy project files including models

diff --git a/docker/rocm1151/Dockerfile b/docker/rocm1151/Dockerfile
@@ -0,0 +1,60 @@
+FROM --platform=$BUILDPLATFORM python:3.12-slim
+
+# Install Python and other dependencies
+RUN apt-get update -y &&  \
+    apt-get install -y --no-install-recommends espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake \
+    make wget gnupg2 ca-certificates libnuma1 libstdc++6 build-essential pigz &&  \
+    apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    mkdir -p /usr/share/espeak-ng-data &&  \
+    ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
+    curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    mv /root/.local/bin/uv /usr/local/bin/ && \
+    mv /root/.local/bin/uvx /usr/local/bin/ && \
+    useradd -m -u 1001 appuser && \
+    mkdir -p /app/api/src/models/v1_0 && \
+    chown -R appuser:appuser /app
+
+USER appuser
+WORKDIR /app
+RUN wget --progress=dot:giga https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-7.10.0.tar.gz && \
+    mkdir rocm_install && \
+    tar -I pigz -xf *.tar.gz -C rocm_install && \
+    rm *.tar.gz
+
+ENV ROCM_PATH=/app/rocm_install
+ENV PATH=$PATH:$ROCM_PATH/bin
+ENV LD_LIBRARY_PATH=$ROCM_PATH/lib
+
+# Copy dependency files
+COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
+
+# Install dependencies with ROCM 1151 extras
+RUN uv venv --python 3.12 && \
+    uv sync --extra rocm1151 --no-cache
+
+# Copy project files including models
+COPY --chown=appuser:appuser api ./api
+COPY --chown=appuser:appuser web ./web
+COPY --chown=appuser:appuser docker/scripts/ ./
+RUN chmod +x ./entrypoint.sh
+
+
+# Set all environment variables in one go
+ENV PATH="/app/.venv/bin:$PATH" \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app:/app/api \
+    UV_LINK_MODE=copy \
+    USE_GPU=true \
+    PHONEMIZER_ESPEAK_PATH=/usr/bin \
+    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
+    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
+    DEVICE="rocm1151"
+
+ENV DOWNLOAD_MODEL=true
+# Download model if enabled
+RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
+    python download_model.py --output api/src/models/v1_0; \
+    fi
+
+# Run FastAPI server through entrypoint.sh
+CMD ["./entrypoint.sh"]
diff --git a/docker/rocm1151/docker-compose.yml b/docker/rocm1151/docker-compose.yml
@@ -0,0 +1,45 @@
+name: kokoro-tts-rocm1151
+services:
+  kokoro-tts:
+    # image: ghcr.io/remsky/kokoro-fastapi-rocm1151:v${VERSION}
+    build:
+      context: ../..
+      dockerfile: docker/rocm1151/Dockerfile
+    volumes:
+      - ../../api:/app/api
+      # These are caches used by ROCm's MIOpen library to speed up kernel selection
+      - ./kokoro-tts/config:/root/.config/miopen
+      - ./kokoro-tts/cache:/root/.cache/miopen
+    user: "1001:1001"  # Ensure container runs as UID 1001 (appuser)
+    ports:
+      - "8880:8880"
+    environment:
+      - PYTHONPATH=/app:/app/api
+      - USE_GPU=true
+      - PYTHONUNBUFFERED=1
+      - API_LOG_LEVEL=DEBUG
+      - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
+      # IMPORTANT: ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model
+      # See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
+      # 1. Run Kokoro once with the following environment variables set:
+      - MIOPEN_FIND_MODE=3
+      - MIOPEN_FIND_ENFORCE=3
+      # 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
+      # 3. Comment out/remove the previously set environment variables
+      # 4. Add the following environment variables to enable caching of model shapes:
+      # - MIOPEN_FIND_MODE=2
+      # 5. Restart the container and run Kokoro again, it should be much faster
+    devices:
+      - /dev/dri
+      - /dev/kfd
+    security_opt:
+      - seccomp:unconfined
+    cap_add:
+      - SYS_PTRACE
+    group_add:
+      # NOTE: These groups are the group ids for: video and render
+      # Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3
+      - 42    #video
+      - 992   #render
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "kokoro-fastapi"
 version = "0.3.0"
 description = "FastAPI TTS Service"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = [
     # Core dependencies
     "fastapi==0.115.6",
@@ -45,6 +45,13 @@ dependencies = [
 [project.optional-dependencies]
 gpu = ["torch==2.8.0+cu129"]
 cpu = ["torch==2.8.0"]
+rocm1151 = [
+    "torch==2.9.1",
+    "pytorch-triton-rocm==3.5.1",
+    "rocm==7.10.0",
+    "rocm-sdk-core==7.10.0",
+    "rocm-sdk-libraries-gfx1151==7.10.0",
+]
 test = [
     "pytest==8.3.5",
     "pytest-cov==6.0.0",
@@ -55,9 +62,9 @@ test = [
 ]
 
 [tool.uv]
-conflicts = [[{ extra = "cpu" }, { extra = "gpu" }]]
+conflicts = [[{ extra = "cpu" }, { extra = "gpu" }, { extra = "rocm1151" }]]
 required-environments = [
-    "sys_platform == 'linux' and platform_machine == 'aarch64'"
+    "sys_platform == 'linux'"
 ]
 override-dependencies = [
     "triton>=3.5.1 ; platform_machine == 'aarch64'"
@@ -67,7 +74,12 @@ override-dependencies = [
 torch = [
     { index = "pytorch-cpu", extra = "cpu" },
     { index = "pytorch-cuda", extra = "gpu" },
+    { index = "pytorch-rocm1151", extra = "rocm1151" }
 ]
+pytorch-triton-rocm = { index = "pytorch-rocm1151" }
+rocm = { index = "pytorch-rocm1151" }
+rocm-sdk-core = { index = "pytorch-rocm1151" }
+rocm-sdk-libraries-gfx1151 = { index = "pytorch-rocm1151" }
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -79,6 +91,11 @@ name = "pytorch-cuda"
 url = "https://download.pytorch.org/whl/cu129"
 explicit = true
 
+[[tool.uv.index]]
+name = "pytorch-rocm1151"
+url = "https://repo.amd.com/rocm/whl/gfx1151/"
+explicit = true
+
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"

diff --git a/start-rocm1151.ps1 b/start-rocm1151.ps1
@@ -0,0 +1,13 @@
+$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
+$env:PYTHONUTF8=1
+$Env:PROJECT_ROOT="$pwd"
+$Env:USE_GPU="true"
+$Env:USE_ONNX="false"
+$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api"
+$Env:MODEL_DIR="src/models"
+$Env:VOICES_DIR="src/voices/v1_0"
+$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
+
+uv pip install -e ".[rocm1151]"
+uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
+uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
diff --git a/start-rocm1151.sh b/start-rocm1151.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Get project root directory
+PROJECT_ROOT=$(pwd)
+
+# Set environment variables
+export USE_GPU=true
+export USE_ONNX=false
+export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
+export MODEL_DIR=src/models
+export VOICES_DIR=src/voices/v1_0
+export WEB_PLAYER_PATH=$PROJECT_ROOT/web
+
+# Run FastAPI with ROCM extras using uv run
+# Note: espeak may still require manual installation,
+uv pip install -e ".[rocm1151]"
+uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
+uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880