Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
python-version: ["3.12"]
fail-fast: false

steps:
Expand Down
31 changes: 30 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ jobs:
- build_target: "gpu"
platform: "linux/arm64"
runs_on: "ubuntu-24.04-arm"
- build_target: "rocm1151"
platform: "linux/amd64"
runs_on: "ubuntu-latest"
runs-on: ${{ matrix.runs_on }}
steps:
- name: Checkout repository
Expand Down Expand Up @@ -137,7 +140,13 @@ jobs:
REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
strategy:
matrix:
build_target: ["cpu", "gpu"]
include:
- build_target: "cpu"
multiplatform: 'true'
- build_target: "gpu"
multiplatform: 'true'
- build_target: "rocm1151"
multiplatform: 'false'
steps:
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
Expand All @@ -147,6 +156,7 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}

- name: Create multi-platform manifest
if: ${{ matrix.multiplatform == 'true' }}
run: |
VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
TARGET="${{ matrix.build_target }}"
Expand All @@ -166,6 +176,25 @@ jobs:
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
fi

- name: Create single-platform manifest
if: ${{ matrix.multiplatform != 'true' }}
run: |
VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
TARGET="${{ matrix.build_target }}"
REGISTRY="${{ env.REGISTRY }}"
OWNER="${{ env.OWNER }}"
REPO="${{ env.REPO }}"

docker buildx imagetools create -t \
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64

if [[ "$VERSION_TAG" != *"-"* ]]; then
docker buildx imagetools create -t \
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64
fi

create-release:
needs: [prepare-release, create-manifests]
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ examples/ebook_test/parse_epub.py
api/src/voices/af_jadzia.pt
examples/assorted_checks/test_combinations/output/*
examples/assorted_checks/test_openai/output/*
docker/rocm1151/kokoro-tts/


# Audio files
Expand Down
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.10
3.12
42 changes: 39 additions & 3 deletions docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ target "_gpu_base" {
dockerfile = "docker/gpu/Dockerfile"
}

# Base settings for AMD ROCm gfx 1151 builds
target "_rocm1151_base" {
inherits = ["_common"]
dockerfile = "docker/rocm1151/Dockerfile"
}

# CPU target with multi-platform support
target "cpu" {
inherits = ["_cpu_base"]
Expand All @@ -60,6 +66,16 @@ target "gpu" {
]
}

# ROCM 1151 target with multi-platform support
target "rocm1151" {
inherits = ["_rocm1151_base"]
platforms = ["linux/amd64" ]
tags = [
"${REGISTRY}/${OWNER}/${REPO}-rocm1151:${VERSION}",
"${REGISTRY}/${OWNER}/${REPO}-rocm1151:latest"
]
}

# Individual platform targets for debugging/testing
target "cpu-amd64" {
inherits = ["_cpu_base"]
Expand Down Expand Up @@ -97,6 +113,16 @@ target "gpu-arm64" {
]
}

# AMD ROCm target with multi-platform support
target "rocm1151-amd64" {
inherits = ["_rocm1151_base"]
platforms = ["linux/amd64"]
tags = [
"${REGISTRY}/${OWNER}/${REPO}-rocm1151:${VERSION}-amd64",
"${REGISTRY}/${OWNER}/${REPO}-rocm1151:latest"
]
}

# Development targets for faster local builds
target "cpu-dev" {
inherits = ["_cpu_base"]
Expand All @@ -110,8 +136,14 @@ target "gpu-dev" {
tags = ["${REGISTRY}/${OWNER}/${REPO}-gpu:dev"]
}

target "rocm1151-dev" {
inherits = ["_rocm1151_base"]
# No multi-platform for dev builds
tags = ["${REGISTRY}/${OWNER}/${REPO}-rocm1151:dev"]
}

group "dev" {
targets = ["cpu-dev", "gpu-dev"]
targets = ["cpu-dev", "gpu-dev", "rocm1151-dev"]
}

# Build groups for different use cases
Expand All @@ -123,10 +155,14 @@ group "gpu-all" {
targets = ["gpu", "gpu-amd64", "gpu-arm64"]
}

group "rocm1151-all" {
targets = ["rocm1151", "rocm1151-amd64" ]
}

group "all" {
targets = ["cpu", "gpu"]
targets = ["cpu", "gpu", "rocm1151"]
}

group "individual-platforms" {
targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64"]
targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm1151-amd64" ]
}
4 changes: 2 additions & 2 deletions docker/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.12-slim

# Install dependencies and check espeak location
# Rust is required to build sudachipy and pyopenjtalk-plus
Expand Down Expand Up @@ -29,7 +29,7 @@ ENV PATH="/home/appuser/.cargo/bin:/app/.venv/bin:$PATH" \
COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml

# Install dependencies with CPU extras
RUN uv venv --python 3.10 && \
RUN uv venv --python 3.12 && \
uv sync --extra cpu --no-cache

# Copy project files including models
Expand Down
4 changes: 2 additions & 2 deletions docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM --platform=$BUILDPLATFORM nvcr.io/nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.0

# Install Python and other dependencies
RUN apt-get update -y && \
apt-get install -y python3.10 python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
apt-get install -y python3.12-dev python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
mkdir -p /usr/share/espeak-ng-data && \
ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
Expand All @@ -20,7 +20,7 @@ WORKDIR /app
COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml

# Install dependencies with GPU extras
RUN uv venv --python 3.10 && \
RUN uv venv --python 3.12 && \
uv sync --extra gpu --no-cache

# Copy project files including models
Expand Down
60 changes: 60 additions & 0 deletions docker/rocm1151/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
FROM --platform=$BUILDPLATFORM python:3.12-slim

# Install Python and other dependencies
RUN apt-get update -y && \
apt-get install -y --no-install-recommends espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake \
make wget gnupg2 ca-certificates libnuma1 libstdc++6 build-essential pigz && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
mkdir -p /usr/share/espeak-ng-data && \
ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/ && \
mv /root/.local/bin/uvx /usr/local/bin/ && \
useradd -m -u 1001 appuser && \
mkdir -p /app/api/src/models/v1_0 && \
chown -R appuser:appuser /app

USER appuser
WORKDIR /app
RUN wget --progress=dot:giga https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-7.10.0.tar.gz && \
mkdir rocm_install && \
tar -I pigz -xf *.tar.gz -C rocm_install && \
rm *.tar.gz

ENV ROCM_PATH=/app/rocm_install
ENV PATH=$PATH:$ROCM_PATH/bin
ENV LD_LIBRARY_PATH=$ROCM_PATH/lib

# Copy dependency files
COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml

# Install dependencies with ROCM 1151 extras
RUN uv venv --python 3.12 && \
uv sync --extra rocm1151 --no-cache

# Copy project files including models
COPY --chown=appuser:appuser api ./api
COPY --chown=appuser:appuser web ./web
COPY --chown=appuser:appuser docker/scripts/ ./
RUN chmod +x ./entrypoint.sh


# Set all environment variables in one go
ENV PATH="/app/.venv/bin:$PATH" \
PYTHONUNBUFFERED=1 \
PYTHONPATH=/app:/app/api \
UV_LINK_MODE=copy \
USE_GPU=true \
PHONEMIZER_ESPEAK_PATH=/usr/bin \
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
DEVICE="rocm1151"

ENV DOWNLOAD_MODEL=true
# Download model if enabled
RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
python download_model.py --output api/src/models/v1_0; \
fi

# Run FastAPI server through entrypoint.sh
CMD ["./entrypoint.sh"]
45 changes: 45 additions & 0 deletions docker/rocm1151/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: kokoro-tts-rocm1151
services:
kokoro-tts:
# image: ghcr.io/remsky/kokoro-fastapi-rocm1151:v${VERSION}
build:
context: ../..
dockerfile: docker/rocm1151/Dockerfile
volumes:
- ../../api:/app/api
# These are caches used by ROCm's MIOpen library to speed up kernel selection
- ./kokoro-tts/config:/root/.config/miopen
- ./kokoro-tts/cache:/root/.cache/miopen
user: "1001:1001" # Ensure container runs as UID 1001 (appuser)
ports:
- "8880:8880"
environment:
- PYTHONPATH=/app:/app/api
- USE_GPU=true
- PYTHONUNBUFFERED=1
- API_LOG_LEVEL=DEBUG
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
# IMPORTANT: ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model
# See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
# 1. Run Kokoro once with the following environment variables set:
- MIOPEN_FIND_MODE=3
- MIOPEN_FIND_ENFORCE=3
# 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
# 3. Comment out/remove the previously set environment variables
# 4. Add the following environment variables to enable caching of model shapes:
# - MIOPEN_FIND_MODE=2
# 5. Restart the container and run Kokoro again, it should be much faster
devices:
- /dev/dri
- /dev/kfd
security_opt:
- seccomp:unconfined
cap_add:
- SYS_PTRACE
group_add:
# NOTE: These groups are the group ids for: video and render
# Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3
- 42 #video
- 992 #render


23 changes: 20 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "kokoro-fastapi"
version = "0.3.0"
description = "FastAPI TTS Service"
readme = "README.md"
requires-python = ">=3.10"
requires-python = ">=3.12"
dependencies = [
# Core dependencies
"fastapi==0.115.6",
Expand Down Expand Up @@ -45,6 +45,13 @@ dependencies = [
[project.optional-dependencies]
gpu = ["torch==2.8.0+cu129"]
cpu = ["torch==2.8.0"]
rocm1151 = [
"torch==2.9.1",
"pytorch-triton-rocm==3.5.1",
"rocm==7.10.0",
"rocm-sdk-core==7.10.0",
"rocm-sdk-libraries-gfx1151==7.10.0",
]
test = [
"pytest==8.3.5",
"pytest-cov==6.0.0",
Expand All @@ -55,9 +62,9 @@ test = [
]

[tool.uv]
conflicts = [[{ extra = "cpu" }, { extra = "gpu" }]]
conflicts = [[{ extra = "cpu" }, { extra = "gpu" }, { extra = "rocm1151" }]]
required-environments = [
"sys_platform == 'linux' and platform_machine == 'aarch64'"
"sys_platform == 'linux'"
]
override-dependencies = [
"triton>=3.5.1 ; platform_machine == 'aarch64'"
Expand All @@ -67,7 +74,12 @@ override-dependencies = [
torch = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cuda", extra = "gpu" },
{ index = "pytorch-rocm1151", extra = "rocm1151" }
]
pytorch-triton-rocm = { index = "pytorch-rocm1151" }
rocm = { index = "pytorch-rocm1151" }
rocm-sdk-core = { index = "pytorch-rocm1151" }
rocm-sdk-libraries-gfx1151 = { index = "pytorch-rocm1151" }

[[tool.uv.index]]
name = "pytorch-cpu"
Expand All @@ -79,6 +91,11 @@ name = "pytorch-cuda"
url = "https://download.pytorch.org/whl/cu129"
explicit = true

[[tool.uv.index]]
name = "pytorch-rocm1151"
url = "https://repo.amd.com/rocm/whl/gfx1151/"
explicit = true

[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
Expand Down
13 changes: 13 additions & 0 deletions start-rocm1151.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
$env:PYTHONUTF8=1
$Env:PROJECT_ROOT="$pwd"
$Env:USE_GPU="true"
$Env:USE_ONNX="false"
$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api"
$Env:MODEL_DIR="src/models"
$Env:VOICES_DIR="src/voices/v1_0"
$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"

uv pip install -e ".[rocm1151]"
uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
18 changes: 18 additions & 0 deletions start-rocm1151.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

# Get project root directory
PROJECT_ROOT=$(pwd)

# Set environment variables
export USE_GPU=true
export USE_ONNX=false
export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
export MODEL_DIR=src/models
export VOICES_DIR=src/voices/v1_0
export WEB_PLAYER_PATH=$PROJECT_ROOT/web

# Run FastAPI with ROCM extras using uv run
# Note: espeak may still require manual installation,
uv pip install -e ".[rocm1151]"
uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880