Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
8ace319
Set minimum characters for Long Text async API
hastla007 Oct 25, 2025
4a08ac1
Rename MIN_LONG_TEXT_LEN to LONG_TEXT_MIN_LENGTH
hastla007 Oct 25, 2025
6cdc417
Add LONG_TEXT_MIN_LENGTH to .env.example
hastla007 Oct 25, 2025
52bba5f
Make long text minimum length configurable
hastla007 Oct 25, 2025
e0abe29
Merge pull request #1 from hastla007/codex/add-configurable-long-text…
hastla007 Oct 25, 2025
4f7938c
Fix long text minimum length validation
hastla007 Oct 25, 2025
b33b185
Merge branch 'main' into codex/add-configurable-long-text-minimum-len…
hastla007 Oct 25, 2025
4ab4eff
Merge pull request #2 from hastla007/codex/add-configurable-long-text…
hastla007 Oct 25, 2025
aa78eb7
Fix long text limits to use runtime configuration
hastla007 Oct 25, 2025
b034b2a
Merge branch 'main' into codex/add-configurable-long-text-minimum-len…
hastla007 Oct 25, 2025
bf51314
Merge pull request #3 from hastla007/codex/add-configurable-long-text…
hastla007 Oct 25, 2025
256ca50
Update LONG_TEXT_MIN_LENGTH in .env.example.docker
hastla007 Oct 25, 2025
3110517
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
f989012
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
f2ac6d6
Update docker-compose.cpu.yml
hastla007 Oct 25, 2025
c16ec94
Change LONG_TEXT_MIN_LENGTH from 1000 to 100
hastla007 Oct 25, 2025
6546a77
Update .env.example
hastla007 Oct 25, 2025
225a9b0
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
507402d
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
8135553
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
3396679
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
c54f57c
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
97e2d82
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
4e68520
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
7a0b06d
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
0cf46f9
Add LONG_TEXT_MIN_LENGTH environment variable
hastla007 Oct 25, 2025
1f4db88
Add project name prefix to Docker commands
hastla007 Oct 25, 2025
fc3c564
feat: add quality presets to long text tts
hastla007 Oct 27, 2025
ac77860
Merge pull request #4 from hastla007/codex/integrate-chunking-and-qua…
hastla007 Oct 27, 2025
88012f3
Align high quality chunk size with TTS limits
hastla007 Oct 27, 2025
532c02f
Merge branch 'main' into codex/integrate-chunking-and-quality-presets…
hastla007 Oct 27, 2025
e4a725e
Merge pull request #5 from hastla007/codex/integrate-chunking-and-qua…
hastla007 Oct 27, 2025
9f15463
Add punctuation-driven pause handling
hastla007 Oct 27, 2025
349c25f
Merge branch 'main' into codex/integrate-chunking-and-quality-presets…
hastla007 Oct 27, 2025
4835eb1
Merge pull request #6 from hastla007/codex/integrate-chunking-and-qua…
hastla007 Oct 27, 2025
7334cce
Extend pause handler coverage for punctuation spacing
hastla007 Oct 27, 2025
1edcde1
Merge branch 'main' into codex/integrate-chunking-and-quality-presets…
hastla007 Oct 27, 2025
4878a21
Merge pull request #7 from hastla007/codex/integrate-chunking-and-qua…
hastla007 Oct 27, 2025
818c080
Update docker-compose.blackwell.yml
hastla007 Oct 27, 2025
32fa02e
Add batched long text audio processing
hastla007 Oct 27, 2025
7be0b1a
Merge pull request #8 from hastla007/codex/integrate-batching-and-def…
hastla007 Oct 27, 2025
479431b
Update .env.example.docker
hastla007 Oct 27, 2025
3b814bb
Update docker-compose.blackwell.yml
hastla007 Oct 28, 2025
b8a4832
Update .env.example.docker
hastla007 Oct 28, 2025
1251285
Update docker-compose.blackwell.yml
hastla007 Oct 28, 2025
38071fe
Use Config pause settings when building PauseHandler
hastla007 Oct 28, 2025
915a3f1
Add configurable pause duration for periods
hastla007 Oct 28, 2025
e2e2267
Merge pull request #9 from hastla007/codex/use-config-values-in-pause…
hastla007 Oct 28, 2025
6e0557f
Align pause defaults with docker configuration
hastla007 Oct 28, 2025
555e4f9
Merge branch 'main' into codex/use-config-values-in-pausehandler-3y7c8x
hastla007 Oct 28, 2025
d8c9b59
Merge pull request #10 from hastla007/codex/use-config-values-in-paus…
hastla007 Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,27 @@ LONG_TEXT_JOB_RETENTION_DAYS=7
# Maximum number of concurrent long text jobs (default: 3)
LONG_TEXT_MAX_CONCURRENT_JOBS=3

# Minimum characters for Long Text async API
LONG_TEXT_MIN_LENGTH=100

# =============================================================================
# Pause Handling Configuration
# =============================================================================

# Enable automatic pauses at punctuation patterns (true/false)
ENABLE_PUNCTUATION_PAUSES=true

# Pause durations in milliseconds for supported punctuation
ELLIPSIS_PAUSE_MS=600
EM_DASH_PAUSE_MS=400
EN_DASH_PAUSE_MS=350
PARAGRAPH_PAUSE_MS=500
LINE_BREAK_PAUSE_MS=250

# Clamp pause durations to avoid extreme values
MIN_PAUSE_MS=100
MAX_PAUSE_MS=2000

# =============================================================================
# Docker-specific Configuration
# =============================================================================
Expand Down Expand Up @@ -146,4 +167,4 @@ ENABLE_MEMORY_MONITORING=true

# For slower, more careful speech:
# CFG_WEIGHT=0.8
# TEMPERATURE=0.4
# TEMPERATURE=0.4
36 changes: 35 additions & 1 deletion .env.example.docker
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ LONG_TEXT_MAX_LENGTH=100000
# Chunk size for splitting long text (default: 2500 chars, must be < MAX_TOTAL_LENGTH)
LONG_TEXT_CHUNK_SIZE=2500

# Batch processing configuration for GPU utilization (tune based on GPU memory)
LONG_TEXT_BATCH_SIZE=6 # RTX 3090/4090: 4-6, A100: 8-12, H100: 12-16

# Silence padding between chunks in milliseconds (default: 200ms)
LONG_TEXT_SILENCE_PADDING_MS=200

Expand All @@ -88,6 +91,37 @@ LONG_TEXT_JOB_RETENTION_DAYS=7
# Maximum number of concurrent long text jobs (default: 3)
LONG_TEXT_MAX_CONCURRENT_JOBS=3

# Minimum characters for Long Text async API
LONG_TEXT_MIN_LENGTH=100

# Default chunking strategy and quality preset for long text processing
LONG_TEXT_CHUNKING_STRATEGY=sentence
LONG_TEXT_QUALITY_PRESET=balanced

# Quality preset tuning parameters
QUALITY_FAST_CHUNK_SIZE=1500
QUALITY_FAST_CFG_WEIGHT=0.3
QUALITY_FAST_TEMPERATURE=0.6

QUALITY_BALANCED_CHUNK_SIZE=2500
QUALITY_BALANCED_CFG_WEIGHT=0.5
QUALITY_BALANCED_TEMPERATURE=0.8

QUALITY_HIGH_CHUNK_SIZE=2800
QUALITY_HIGH_CFG_WEIGHT=0.7
QUALITY_HIGH_TEMPERATURE=1.0

# Pause handling configuration
ENABLE_PUNCTUATION_PAUSES=true
ELLIPSIS_PAUSE_MS=800
EM_DASH_PAUSE_MS=550
EN_DASH_PAUSE_MS=375
PERIOD_PAUSE_MS=500
PARAGRAPH_PAUSE_MS=800
LINE_BREAK_PAUSE_MS=350
MIN_PAUSE_MS=200
MAX_PAUSE_MS=2000

# =============================================================================
# Docker Volume Configuration
# =============================================================================
Expand Down Expand Up @@ -141,4 +175,4 @@ ENABLE_MEMORY_MONITORING=true

# For slower, more careful speech:
# CFG_WEIGHT=0.8
# TEMPERATURE=0.4
# TEMPERATURE=0.4
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ coverage.xml
test_*

!tests/test_*
!unit_tests/test_*

reference/
CLAUDE.md
Expand Down
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,18 @@ cp .env.example.docker .env # Docker-specific paths, ready to use
# Choose your deployment method:

# API Only (default)
docker compose -f docker/docker-compose.yml up -d # Standard (pip-based)
docker compose -f docker/docker-compose.uv.yml up -d # uv-optimized (faster builds)
docker compose -f docker/docker-compose.gpu.yml up -d # Standard + GPU
docker compose -f docker/docker-compose.uv.gpu.yml up -d # uv + GPU (recommended for GPU users)
docker compose -f docker/docker-compose.cpu.yml up -d # CPU-only
docker compose -f docker/docker-compose.blackwell.yml up -d # Blackwell (50XX) NVIDIA GPUs
docker compose -p tts-api -f docker/docker-compose.yml up -d # Standard (pip-based)
docker compose -p tts-api -f docker/docker-compose.uv.yml up -d # uv-optimized (faster builds)
docker compose -p tts-api -f docker/docker-compose.gpu.yml up -d # Standard + GPU
docker compose -p tts-api -f docker/docker-compose.uv.gpu.yml up -d # uv + GPU (recommended for GPU users)
docker compose -p tts-api -f docker/docker-compose.cpu.yml up -d # CPU-only
docker compose -p tts-api -f docker/docker-compose.blackwell.yml up -d # Blackwell (50XX) NVIDIA GPUs

# API + Frontend (add --profile frontend to any of the above)
docker compose -f docker/docker-compose.yml --profile frontend up -d # Standard + Frontend
docker compose -f docker/docker-compose.gpu.yml --profile frontend up -d # GPU + Frontend
docker compose -f docker/docker-compose.uv.gpu.yml --profile frontend up -d # uv + GPU + Frontend
docker compose -f docker/docker-compose.blackwell.yml --profile frontend up -d # (Blackwell) uv + GPU + Frontend
docker compose -p tts-api -f docker/docker-compose.yml --profile frontend up -d # Standard + Frontend
docker compose -p tts-api -f docker/docker-compose.gpu.yml --profile frontend up -d # GPU + Frontend
docker compose -p tts-api -f docker/docker-compose.uv.gpu.yml --profile frontend up -d # uv + GPU + Frontend
docker compose -p tts-api -f docker/docker-compose.blackwell.yml --profile frontend up -d # (Blackwell) uv + GPU + Frontend

# Watch the logs as it initializes (the first use of TTS takes the longest)
docker logs chatterbox-tts-api -f
Expand Down
3 changes: 2 additions & 1 deletion app/api/endpoints/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ async def get_config():
"cfg_weight": Config.CFG_WEIGHT,
"temperature": Config.TEMPERATURE,
"max_chunk_length": Config.MAX_CHUNK_LENGTH,
"max_total_length": Config.MAX_TOTAL_LENGTH
"max_total_length": Config.MAX_TOTAL_LENGTH,
"long_text_min_length": Config.get_long_text_min_length(),
},
memory_management={
"memory_cleanup_interval": Config.MEMORY_CLEANUP_INTERVAL,
Expand Down
30 changes: 24 additions & 6 deletions app/api/endpoints/long_text.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Long text TTS endpoints for processing texts > 3000 characters
Long text TTS endpoints for processing texts that exceed the configured minimum length
"""

import asyncio
Expand Down Expand Up @@ -30,6 +30,7 @@
from app.config import Config
from app.core.long_text_jobs import get_job_manager
from app.core.background_tasks import get_processor
from app.core.quality_presets import get_quality_preset
from app.core.text_processing import validate_long_text_input, estimate_processing_time
from app.core import add_route_aliases

Expand All @@ -43,7 +44,7 @@ async def create_long_text_job(request: LongTextRequest):
"""
Submit a long text TTS job for background processing.

Text must be > 3000 characters to use this endpoint.
Text must exceed the configured minimum length to use this endpoint.
For shorter texts, use /audio/speech instead.
"""
try:
Expand All @@ -60,6 +61,17 @@ async def create_long_text_job(request: LongTextRequest):
}
)

# Resolve quality and chunking configuration
preset_name = request.get_quality_preset()
preset_config = get_quality_preset(preset_name)

cfg_weight = request.cfg_weight if request.cfg_weight is not None else preset_config["cfg_weight"]
temperature = request.temperature if request.temperature is not None else preset_config["temperature"]
chunk_size = request.get_chunk_size(preset_config)
silence_padding = request.get_silence_padding()
chunking_strategy = request.get_chunking_strategy()
pause_settings = request.resolve_pause_settings()

# Get job manager and processor
job_manager = get_job_manager()
processor = get_processor()
Expand All @@ -70,16 +82,22 @@ async def create_long_text_job(request: LongTextRequest):
voice=request.voice,
output_format=request.response_format or "mp3",
exaggeration=request.exaggeration,
cfg_weight=request.cfg_weight,
temperature=request.temperature,
session_id=request.session_id
cfg_weight=cfg_weight,
temperature=temperature,
session_id=request.session_id,
chunking_strategy=chunking_strategy,
chunk_size=chunk_size,
silence_padding=silence_padding,
quality_preset=preset_name,
enable_pauses=pause_settings["enable"],
custom_pauses=pause_settings["custom"],
)

# Submit for background processing
await processor.submit_job(job_id)

# Estimate processing time
estimated_time = estimate_processing_time(len(request.input))
estimated_time = estimate_processing_time(len(request.input), chunk_size=chunk_size)

return LongTextJobCreateResponse(
job_id=job_id,
Expand Down
Loading