Skip to content

Commit 335093b

Browse files
QinYuuuuDev Agent
andauthored
Support tts (#602)
Co-authored-by: Dev Agent <[email protected]>
1 parent f293dd5 commit 335093b

File tree

10 files changed

+455
-1
lines changed

10 files changed

+455
-1
lines changed

builder/store/database/deploy_task.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ type Deploy struct {
5151
SecureLevel int `json:"secure_level"`
5252
// 0-space, 1-inference, 2-finetune, 3-serverless, 4-evaluation, 5-notebook
5353
Type int `json:"type"`
54-
Task types.PipelineTask `bun:",nullzero" json:"task"` //text-generation,text-to-image
54+
Task types.PipelineTask `bun:",nullzero" json:"task"` //text-generation,text-to-image,text-to-speech
5555
UserUUID string `bun:"," json:"user_uuid"`
5656
SKU string `bun:"," json:"sku"`
5757
OrderDetailID int64 `bun:"," json:"order_detail_id"`

common/types/repo.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ const (
9393
SentenceSimilarity PipelineTask = "sentence-similarity"
9494
TaskAutoDetection PipelineTask = "task-auto-detection"
9595
VideoText2Text PipelineTask = "video-text-to-text"
96+
TextToSpeech PipelineTask = "text-to-speech"
9697
LlamaCpp InferenceEngine = "llama.cpp"
9798
TEI InferenceEngine = "tei"
9899
Ktransformers InferenceEngine = "ktransformers"

component/callback/git_callback.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,9 @@ func GetPipelineTaskFromTags(tags []database.Tag) types.PipelineTask {
561561
if tag.Name == string(types.Text2Image) {
562562
return types.Text2Image
563563
}
564+
if tag.Name == string(types.TextToSpeech) {
565+
return types.TextToSpeech
566+
}
564567
}
565568
return ""
566569
}

component/model.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,6 +1370,9 @@ func GetBuiltInTaskFromTags(tags []database.Tag) string {
13701370
if tag.Name == string(types.ImageText2Text) {
13711371
return tag.Name
13721372
}
1373+
if tag.Name == string(types.TextToSpeech) {
1374+
return tag.Name
1375+
}
13731376
}
13741377
return string(types.TextGeneration)
13751378
}

configs/inference/audio-fish.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"engine_name": "hf-inference-toolkit",
3+
"enabled": 1,
4+
"container_port": 8000,
5+
"model_format": "safetensors",
6+
"engine_images": [
7+
{
8+
"compute_type": "gpu",
9+
"image": "opencsghq/fish-speech:server-cuda",
10+
"driver_version": "12.6",
11+
"engine_version": "1.5.1"
12+
},
13+
{
14+
"compute_type": "cpu",
15+
"image": "opencsghq/fish-speech:server-cpu",
16+
"engine_version": "1.5.1"
17+
}
18+
],
19+
"supported_models": ["openaudio-s1-mini"]
20+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM docker.1ms.run/fishaudio/fish-speech:server-cpu
2+
USER root
3+
RUN apt-get update && \
4+
apt-get install -y dumb-init && apt-get clean && rm -rf /var/lib/apt/lists/*
5+
6+
# Switch to the user from base image (fish, UID 1000) to install packages
7+
# Install directly into the virtual environment to avoid uv sync delays
8+
USER 1000:1000
9+
WORKDIR /app
10+
# Configure pip to use Aliyun mirror and install into the virtual environment
11+
RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
12+
uv pip install --no-cache-dir csghub-sdk==0.7.10
13+
14+
USER root
15+
COPY ./fishaudio/ /etc/csghub/
16+
RUN chmod +x /etc/csghub/*.sh
17+
18+
WORKDIR /workspace/
19+
RUN curl -L -o references.tar.gz https://git-devops.opencsg.com/opensource/public_files/-/raw/main/references.tar.gz && \
20+
tar -xzf references.tar.gz && \
21+
rm references.tar.gz
22+
ENV HUGGINGFACE_HUB_CACHE=/workspace/ \
23+
HF_HUB_ENABLE_HF_TRANSFER=0
24+
ENV PORT=8000
25+
EXPOSE 8000
26+
ENTRYPOINT [ "/usr/bin/dumb-init", "--" ]
27+
CMD ["/etc/csghub/serve.sh"]
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM docker.1ms.run/fishaudio/fish-speech:server-cuda
2+
3+
USER root
4+
RUN apt-get update && \
5+
apt-get install -y dumb-init && apt-get clean && rm -rf /var/lib/apt/lists/*
6+
7+
# Switch to the user from base image (fish, UID 1000) to install packages
8+
# Install directly into the virtual environment to avoid uv sync delays
9+
USER 1000:1000
10+
WORKDIR /app
11+
RUN uv pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir csghub-sdk==0.7.10
12+
13+
USER root
14+
COPY ./fishaudio/ /etc/csghub/
15+
RUN chmod +x /etc/csghub/*.sh
16+
17+
WORKDIR /workspace/
18+
RUN curl -L -o references.tar.gz https://git-devops.opencsg.com/opensource/public_files/-/raw/main/references.tar.gz && \
19+
tar -xzf references.tar.gz && \
20+
rm references.tar.gz
21+
ENV HUGGINGFACE_HUB_CACHE=/workspace/ \
22+
HF_HUB_ENABLE_HF_TRANSFER=0
23+
ENV PORT=8000
24+
ENV COMPILE=1
25+
EXPOSE 8000
26+
ENTRYPOINT [ "/usr/bin/dumb-init", "--" ]
27+
CMD ["/etc/csghub/serve.sh"]

0 commit comments

Comments
 (0)