Skip to content

Commit c80a20a

Browse files
committed
feat: add VoxCPM tts backend
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 8845186 commit c80a20a

21 files changed

+602
-3
lines changed

.github/workflows/backend.yml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,19 @@ jobs:
105105
dockerfile: "./backend/Dockerfile.python"
106106
context: "./"
107107
ubuntu-version: '2404'
108+
- build-type: 'cublas'
109+
cuda-major-version: "12"
110+
cuda-minor-version: "9"
111+
platforms: 'linux/amd64'
112+
tag-latest: 'auto'
113+
tag-suffix: '-gpu-nvidia-cuda-12-voxcpm'
114+
runs-on: 'ubuntu-latest'
115+
base-image: "ubuntu:24.04"
116+
skip-drivers: 'false'
117+
backend: "voxcpm"
118+
dockerfile: "./backend/Dockerfile.python"
119+
context: "./"
120+
ubuntu-version: '2404'
108121
- build-type: 'cublas'
109122
cuda-major-version: "12"
110123
cuda-minor-version: "9"
@@ -353,6 +366,19 @@ jobs:
353366
dockerfile: "./backend/Dockerfile.python"
354367
context: "./"
355368
ubuntu-version: '2404'
369+
- build-type: 'cublas'
370+
cuda-major-version: "13"
371+
cuda-minor-version: "0"
372+
platforms: 'linux/amd64'
373+
tag-latest: 'auto'
374+
tag-suffix: '-gpu-nvidia-cuda-13-voxcpm'
375+
runs-on: 'ubuntu-latest'
376+
base-image: "ubuntu:24.04"
377+
skip-drivers: 'false'
378+
backend: "voxcpm"
379+
dockerfile: "./backend/Dockerfile.python"
380+
context: "./"
381+
ubuntu-version: '2404'
356382
- build-type: 'cublas'
357383
cuda-major-version: "13"
358384
cuda-minor-version: "0"
@@ -680,6 +706,19 @@ jobs:
680706
dockerfile: "./backend/Dockerfile.python"
681707
context: "./"
682708
ubuntu-version: '2404'
709+
- build-type: 'hipblas'
710+
cuda-major-version: ""
711+
cuda-minor-version: ""
712+
platforms: 'linux/amd64'
713+
tag-latest: 'auto'
714+
tag-suffix: '-gpu-rocm-hipblas-voxcpm'
715+
runs-on: 'arc-runner-set'
716+
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
717+
skip-drivers: 'false'
718+
backend: "voxcpm"
719+
dockerfile: "./backend/Dockerfile.python"
720+
context: "./"
721+
ubuntu-version: '2404'
683722
- build-type: 'hipblas'
684723
cuda-major-version: ""
685724
cuda-minor-version: ""
@@ -890,6 +929,19 @@ jobs:
890929
dockerfile: "./backend/Dockerfile.python"
891930
context: "./"
892931
ubuntu-version: '2404'
932+
- build-type: 'intel'
933+
cuda-major-version: ""
934+
cuda-minor-version: ""
935+
platforms: 'linux/amd64'
936+
tag-latest: 'auto'
937+
tag-suffix: '-gpu-intel-voxcpm'
938+
runs-on: 'arc-runner-set'
939+
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
940+
skip-drivers: 'false'
941+
backend: "voxcpm"
942+
dockerfile: "./backend/Dockerfile.python"
943+
context: "./"
944+
ubuntu-version: '2404'
893945
- build-type: 'intel'
894946
cuda-major-version: ""
895947
cuda-minor-version: ""
@@ -1343,6 +1395,19 @@ jobs:
13431395
dockerfile: "./backend/Dockerfile.python"
13441396
context: "./"
13451397
ubuntu-version: '2404'
1398+
- build-type: ''
1399+
cuda-major-version: ""
1400+
cuda-minor-version: ""
1401+
platforms: 'linux/amd64,linux/arm64'
1402+
tag-latest: 'auto'
1403+
tag-suffix: '-cpu-voxcpm'
1404+
runs-on: 'ubuntu-latest'
1405+
base-image: "ubuntu:24.04"
1406+
skip-drivers: 'false'
1407+
backend: "voxcpm"
1408+
dockerfile: "./backend/Dockerfile.python"
1409+
context: "./"
1410+
ubuntu-version: '2404'
13461411
- build-type: ''
13471412
cuda-major-version: ""
13481413
cuda-minor-version: ""

.github/workflows/test-extra.yml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,4 +284,23 @@ jobs:
284284
- name: Test pocket-tts
285285
run: |
286286
make --jobs=5 --output-sync=target -C backend/python/pocket-tts
287-
make --jobs=5 --output-sync=target -C backend/python/pocket-tts test
287+
make --jobs=5 --output-sync=target -C backend/python/pocket-tts test
288+
tests-voxcpm:
289+
runs-on: ubuntu-latest
290+
steps:
291+
- name: Clone
292+
uses: actions/checkout@v6
293+
with:
294+
submodules: true
295+
- name: Dependencies
296+
run: |
297+
sudo apt-get update
298+
sudo apt-get install build-essential ffmpeg
299+
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
300+
# Install UV
301+
curl -LsSf https://astral.sh/uv/install.sh | sh
302+
pip install --user --no-cache-dir grpcio-tools==1.64.1
303+
- name: Test voxcpm
304+
run: |
305+
make --jobs=5 --output-sync=target -C backend/python/voxcpm
306+
make --jobs=5 --output-sync=target -C backend/python/voxcpm test

Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Disable parallel execution for backend builds
2-
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/moonshine backends/pocket-tts
2+
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/moonshine backends/pocket-tts backends/voxcpm
33

44
GOCMD=go
55
GOTEST=$(GOCMD) test
@@ -317,6 +317,7 @@ prepare-test-extra: protogen-python
317317
$(MAKE) -C backend/python/vibevoice
318318
$(MAKE) -C backend/python/moonshine
319319
$(MAKE) -C backend/python/pocket-tts
320+
$(MAKE) -C backend/python/voxcpm
320321

321322
test-extra: prepare-test-extra
322323
$(MAKE) -C backend/python/transformers test
@@ -326,6 +327,7 @@ test-extra: prepare-test-extra
326327
$(MAKE) -C backend/python/vibevoice test
327328
$(MAKE) -C backend/python/moonshine test
328329
$(MAKE) -C backend/python/pocket-tts test
330+
$(MAKE) -C backend/python/voxcpm test
329331

330332
DOCKER_IMAGE?=local-ai
331333
DOCKER_AIO_IMAGE?=local-ai-aio
@@ -459,6 +461,7 @@ BACKEND_CHATTERBOX = chatterbox|python|.|false|true
459461
BACKEND_VIBEVOICE = vibevoice|python|.|--progress=plain|true
460462
BACKEND_MOONSHINE = moonshine|python|.|false|true
461463
BACKEND_POCKET_TTS = pocket-tts|python|.|false|true
464+
BACKEND_VOXCPM = voxcpm|python|.|false|true
462465

463466
# Helper function to build docker image for a backend
464467
# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
@@ -505,12 +508,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
505508
$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
506509
$(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE)))
507510
$(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS)))
511+
$(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM)))
508512

509513
# Pattern rule for docker-save targets
510514
docker-save-%: backend-images
511515
docker save local-ai-backend:$* -o backend-images/$*.tar
512516

513-
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-exllama2 docker-build-moonshine docker-build-pocket-tts
517+
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-exllama2 docker-build-moonshine docker-build-pocket-tts docker-build-voxcpm
514518

515519
########################################################
516520
### END Backends

backend/index.yaml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,25 @@
428428
nvidia-l4t-cuda-12: "nvidia-l4t-vibevoice"
429429
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-vibevoice"
430430
icon: https://avatars.githubusercontent.com/u/6154722?s=200&v=4
431+
- &voxcpm
432+
urls:
433+
- https://github.com/ModelBest/VoxCPM
434+
description: |
435+
VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech.
436+
tags:
437+
- text-to-speech
438+
- TTS
439+
license: mit
440+
name: "voxcpm"
441+
alias: "voxcpm"
442+
capabilities:
443+
nvidia: "cuda12-voxcpm"
444+
intel: "intel-voxcpm"
445+
amd: "rocm-voxcpm"
446+
default: "cpu-voxcpm"
447+
nvidia-cuda-13: "cuda13-voxcpm"
448+
nvidia-cuda-12: "cuda12-voxcpm"
449+
icon: https://avatars.githubusercontent.com/u/6154722?s=200&v=4
431450
- &pocket-tts
432451
urls:
433452
- https://github.com/kyutai-labs/pocket-tts
@@ -1613,6 +1632,66 @@
16131632
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice"
16141633
mirrors:
16151634
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice
1635+
## voxcpm
1636+
- !!merge <<: *voxcpm
1637+
name: "voxcpm-development"
1638+
capabilities:
1639+
nvidia: "cuda12-voxcpm-development"
1640+
intel: "intel-voxcpm-development"
1641+
amd: "rocm-voxcpm-development"
1642+
default: "cpu-voxcpm-development"
1643+
nvidia-cuda-13: "cuda13-voxcpm-development"
1644+
nvidia-cuda-12: "cuda12-voxcpm-development"
1645+
- !!merge <<: *voxcpm
1646+
name: "cpu-voxcpm"
1647+
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-voxcpm"
1648+
mirrors:
1649+
- localai/localai-backends:latest-cpu-voxcpm
1650+
- !!merge <<: *voxcpm
1651+
name: "cpu-voxcpm-development"
1652+
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-voxcpm"
1653+
mirrors:
1654+
- localai/localai-backends:master-cpu-voxcpm
1655+
- !!merge <<: *voxcpm
1656+
name: "cuda12-voxcpm"
1657+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-voxcpm"
1658+
mirrors:
1659+
- localai/localai-backends:latest-gpu-nvidia-cuda-12-voxcpm
1660+
- !!merge <<: *voxcpm
1661+
name: "cuda12-voxcpm-development"
1662+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-voxcpm"
1663+
mirrors:
1664+
- localai/localai-backends:master-gpu-nvidia-cuda-12-voxcpm
1665+
- !!merge <<: *voxcpm
1666+
name: "cuda13-voxcpm"
1667+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-voxcpm"
1668+
mirrors:
1669+
- localai/localai-backends:latest-gpu-nvidia-cuda-13-voxcpm
1670+
- !!merge <<: *voxcpm
1671+
name: "cuda13-voxcpm-development"
1672+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-voxcpm"
1673+
mirrors:
1674+
- localai/localai-backends:master-gpu-nvidia-cuda-13-voxcpm
1675+
- !!merge <<: *voxcpm
1676+
name: "intel-voxcpm"
1677+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-voxcpm"
1678+
mirrors:
1679+
- localai/localai-backends:latest-gpu-intel-voxcpm
1680+
- !!merge <<: *voxcpm
1681+
name: "intel-voxcpm-development"
1682+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-voxcpm"
1683+
mirrors:
1684+
- localai/localai-backends:master-gpu-intel-voxcpm
1685+
- !!merge <<: *voxcpm
1686+
name: "rocm-voxcpm"
1687+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-voxcpm"
1688+
mirrors:
1689+
- localai/localai-backends:latest-gpu-rocm-hipblas-voxcpm
1690+
- !!merge <<: *voxcpm
1691+
name: "rocm-voxcpm-development"
1692+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voxcpm"
1693+
mirrors:
1694+
- localai/localai-backends:master-gpu-rocm-hipblas-voxcpm
16161695
## pocket-tts
16171696
- !!merge <<: *pocket-tts
16181697
name: "pocket-tts-development"

backend/python/voxcpm/Makefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
.PHONY: voxcpm
2+
voxcpm:
3+
bash install.sh
4+
5+
.PHONY: run
6+
run: voxcpm
7+
@echo "Running voxcpm..."
8+
bash run.sh
9+
@echo "voxcpm run."
10+
11+
.PHONY: test
12+
test: voxcpm
13+
@echo "Testing voxcpm..."
14+
bash test.sh
15+
@echo "voxcpm tested."
16+
17+
.PHONY: protogen-clean
18+
protogen-clean:
19+
$(RM) backend_pb2_grpc.py backend_pb2.py
20+
21+
.PHONY: clean
22+
clean: protogen-clean
23+
rm -rf venv __pycache__

0 commit comments

Comments
 (0)