travisvn · hastla007 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/.env.example b/.env.example
@@ -93,6 +93,27 @@ LONG_TEXT_JOB_RETENTION_DAYS=7
 # Maximum number of concurrent long text jobs (default: 3)
 LONG_TEXT_MAX_CONCURRENT_JOBS=3
 
+# Minimum characters for Long Text async API
+LONG_TEXT_MIN_LENGTH=100
+
+# =============================================================================
+# Pause Handling Configuration
+# =============================================================================
+
+# Enable automatic pauses at punctuation patterns (true/false)
+ENABLE_PUNCTUATION_PAUSES=true
+
+# Pause durations in milliseconds for supported punctuation
+ELLIPSIS_PAUSE_MS=600
+EM_DASH_PAUSE_MS=400
+EN_DASH_PAUSE_MS=350
+PARAGRAPH_PAUSE_MS=500
+LINE_BREAK_PAUSE_MS=250
+
+# Clamp pause durations to avoid extreme values
+MIN_PAUSE_MS=100
+MAX_PAUSE_MS=2000
+
 # =============================================================================
 # Docker-specific Configuration
 # =============================================================================
@@ -146,4 +167,4 @@ ENABLE_MEMORY_MONITORING=true
 
 # For slower, more careful speech:
 # CFG_WEIGHT=0.8
-# TEMPERATURE=0.4 
+# TEMPERATURE=0.4 
diff --git a/.env.example.docker b/.env.example.docker
@@ -79,6 +79,9 @@ LONG_TEXT_MAX_LENGTH=100000
 # Chunk size for splitting long text (default: 2500 chars, must be < MAX_TOTAL_LENGTH)
 LONG_TEXT_CHUNK_SIZE=2500
 
+# Batch processing configuration for GPU utilization (tune based on GPU memory)
+LONG_TEXT_BATCH_SIZE=6  # RTX 3090/4090: 4-6, A100: 8-12, H100: 12-16
+
 # Silence padding between chunks in milliseconds (default: 200ms)
 LONG_TEXT_SILENCE_PADDING_MS=200
 
@@ -88,6 +91,37 @@ LONG_TEXT_JOB_RETENTION_DAYS=7
 # Maximum number of concurrent long text jobs (default: 3)
 LONG_TEXT_MAX_CONCURRENT_JOBS=3
 
+# Minimum characters for Long Text async API
+LONG_TEXT_MIN_LENGTH=100
+
+# Default chunking strategy and quality preset for long text processing
+LONG_TEXT_CHUNKING_STRATEGY=sentence
+LONG_TEXT_QUALITY_PRESET=balanced
+
+# Quality preset tuning parameters
+QUALITY_FAST_CHUNK_SIZE=1500
+QUALITY_FAST_CFG_WEIGHT=0.3
+QUALITY_FAST_TEMPERATURE=0.6
+
+QUALITY_BALANCED_CHUNK_SIZE=2500
+QUALITY_BALANCED_CFG_WEIGHT=0.5
+QUALITY_BALANCED_TEMPERATURE=0.8
+
+QUALITY_HIGH_CHUNK_SIZE=2800
+QUALITY_HIGH_CFG_WEIGHT=0.7
+QUALITY_HIGH_TEMPERATURE=1.0
+
+# Pause handling configuration
+ENABLE_PUNCTUATION_PAUSES=true
+ELLIPSIS_PAUSE_MS=800
+EM_DASH_PAUSE_MS=550
+EN_DASH_PAUSE_MS=375
+PERIOD_PAUSE_MS=500
+PARAGRAPH_PAUSE_MS=800
+LINE_BREAK_PAUSE_MS=350
+MIN_PAUSE_MS=200
+MAX_PAUSE_MS=2000
+
 # =============================================================================
 # Docker Volume Configuration
 # =============================================================================
@@ -141,4 +175,4 @@ ENABLE_MEMORY_MONITORING=true
 
 # For slower, more careful speech:
 # CFG_WEIGHT=0.8
-# TEMPERATURE=0.4 
+# TEMPERATURE=0.4 
diff --git a/.gitignore b/.gitignore
@@ -99,6 +99,7 @@ coverage.xml
 test_*
 
 !tests/test_*
+!unit_tests/test_*
 
 reference/
 CLAUDE.md

diff --git a/README.md b/README.md
@@ -123,18 +123,18 @@ cp .env.example.docker .env  # Docker-specific paths, ready to use
 # Choose your deployment method:
 
 # API Only (default)
-docker compose -f docker/docker-compose.yml up -d             # Standard (pip-based)
-docker compose -f docker/docker-compose.uv.yml up -d          # uv-optimized (faster builds)
-docker compose -f docker/docker-compose.gpu.yml up -d         # Standard + GPU
-docker compose -f docker/docker-compose.uv.gpu.yml up -d      # uv + GPU (recommended for GPU users)
-docker compose -f docker/docker-compose.cpu.yml up -d         # CPU-only
-docker compose -f docker/docker-compose.blackwell.yml up -d   # Blackwell (50XX) NVIDIA GPUs
+docker compose -p tts-api -f docker/docker-compose.yml up -d             # Standard (pip-based)
+docker compose -p tts-api -f docker/docker-compose.uv.yml up -d          # uv-optimized (faster builds)
+docker compose -p tts-api -f docker/docker-compose.gpu.yml up -d         # Standard + GPU
+docker compose -p tts-api -f docker/docker-compose.uv.gpu.yml up -d      # uv + GPU (recommended for GPU users)
+docker compose -p tts-api -f docker/docker-compose.cpu.yml up -d         # CPU-only
+docker compose -p tts-api -f docker/docker-compose.blackwell.yml up -d   # Blackwell (50XX) NVIDIA GPUs
 
 # API + Frontend (add --profile frontend to any of the above)
-docker compose -f docker/docker-compose.yml --profile frontend up -d             # Standard + Frontend
-docker compose -f docker/docker-compose.gpu.yml --profile frontend up -d         # GPU + Frontend
-docker compose -f docker/docker-compose.uv.gpu.yml --profile frontend up -d      # uv + GPU + Frontend
-docker compose -f docker/docker-compose.blackwell.yml --profile frontend up -d   # (Blackwell) uv + GPU + Frontend
+docker compose -p tts-api -f docker/docker-compose.yml --profile frontend up -d             # Standard + Frontend
+docker compose -p tts-api -f docker/docker-compose.gpu.yml --profile frontend up -d         # GPU + Frontend
+docker compose -p tts-api -f docker/docker-compose.uv.gpu.yml --profile frontend up -d      # uv + GPU + Frontend
+docker compose -p tts-api -f docker/docker-compose.blackwell.yml --profile frontend up -d   # (Blackwell) uv + GPU + Frontend
 
 # Watch the logs as it initializes (the first use of TTS takes the longest)
 docker logs chatterbox-tts-api -f

diff --git a/app/api/endpoints/config.py b/app/api/endpoints/config.py
@@ -50,7 +50,8 @@ async def get_config():
             "cfg_weight": Config.CFG_WEIGHT,
             "temperature": Config.TEMPERATURE,
             "max_chunk_length": Config.MAX_CHUNK_LENGTH,
-            "max_total_length": Config.MAX_TOTAL_LENGTH
+            "max_total_length": Config.MAX_TOTAL_LENGTH,
+            "long_text_min_length": Config.get_long_text_min_length(),
         },
         memory_management={
             "memory_cleanup_interval": Config.MEMORY_CLEANUP_INTERVAL,

diff --git a/app/api/endpoints/long_text.py b/app/api/endpoints/long_text.py
@@ -1,5 +1,5 @@
 """
-Long text TTS endpoints for processing texts > 3000 characters
+Long text TTS endpoints for processing texts that exceed the configured minimum length
 """
 
 import asyncio
@@ -30,6 +30,7 @@
 from app.config import Config
 from app.core.long_text_jobs import get_job_manager
 from app.core.background_tasks import get_processor
+from app.core.quality_presets import get_quality_preset
 from app.core.text_processing import validate_long_text_input, estimate_processing_time
 from app.core import add_route_aliases
 
@@ -43,7 +44,7 @@ async def create_long_text_job(request: LongTextRequest):
     """
     Submit a long text TTS job for background processing.
 
-    Text must be > 3000 characters to use this endpoint.
+    Text must exceed the configured minimum length to use this endpoint.
     For shorter texts, use /audio/speech instead.
     """
     try:
@@ -60,6 +61,17 @@ async def create_long_text_job(request: LongTextRequest):
                 }
             )
 
+        # Resolve quality and chunking configuration
+        preset_name = request.get_quality_preset()
+        preset_config = get_quality_preset(preset_name)
+
+        cfg_weight = request.cfg_weight if request.cfg_weight is not None else preset_config["cfg_weight"]
+        temperature = request.temperature if request.temperature is not None else preset_config["temperature"]
+        chunk_size = request.get_chunk_size(preset_config)
+        silence_padding = request.get_silence_padding()
+        chunking_strategy = request.get_chunking_strategy()
+        pause_settings = request.resolve_pause_settings()
+
         # Get job manager and processor
         job_manager = get_job_manager()
         processor = get_processor()
@@ -70,16 +82,22 @@ async def create_long_text_job(request: LongTextRequest):
             voice=request.voice,
             output_format=request.response_format or "mp3",
             exaggeration=request.exaggeration,
-            cfg_weight=request.cfg_weight,
-            temperature=request.temperature,
-            session_id=request.session_id
+            cfg_weight=cfg_weight,
+            temperature=temperature,
+            session_id=request.session_id,
+            chunking_strategy=chunking_strategy,
+            chunk_size=chunk_size,
+            silence_padding=silence_padding,
+            quality_preset=preset_name,
+            enable_pauses=pause_settings["enable"],
+            custom_pauses=pause_settings["custom"],
         )
 
         # Submit for background processing
         await processor.submit_job(job_id)
 
         # Estimate processing time
-        estimated_time = estimate_processing_time(len(request.input))
+        estimated_time = estimate_processing_time(len(request.input), chunk_size=chunk_size)
 
         return LongTextJobCreateResponse(
             job_id=job_id,