diff --git a/.env.example b/.env.example
index 49245c9..97b7bf9 100755
--- a/.env.example
+++ b/.env.example
@@ -93,6 +93,27 @@ LONG_TEXT_JOB_RETENTION_DAYS=7
 # Maximum number of concurrent long text jobs (default: 3)
 LONG_TEXT_MAX_CONCURRENT_JOBS=3
 
+# Minimum characters for Long Text async API
+LONG_TEXT_MIN_LENGTH=100
+
+# =============================================================================
+# Pause Handling Configuration
+# =============================================================================
+
+# Enable automatic pauses at punctuation patterns (true/false)
+ENABLE_PUNCTUATION_PAUSES=true
+
+# Pause durations in milliseconds for supported punctuation
+ELLIPSIS_PAUSE_MS=600
+EM_DASH_PAUSE_MS=400
+EN_DASH_PAUSE_MS=350
+PARAGRAPH_PAUSE_MS=500
+LINE_BREAK_PAUSE_MS=250
+
+# Clamp pause durations to avoid extreme values
+MIN_PAUSE_MS=100
+MAX_PAUSE_MS=2000
+
 # =============================================================================
 # Docker-specific Configuration
 # =============================================================================
@@ -146,4 +167,4 @@ ENABLE_MEMORY_MONITORING=true
 
 # For slower, more careful speech:
 # CFG_WEIGHT=0.8
-# TEMPERATURE=0.4 
\ No newline at end of file
+# TEMPERATURE=0.4 
diff --git a/.env.example.docker b/.env.example.docker
index 126cfad..f38bee4 100755
--- a/.env.example.docker
+++ b/.env.example.docker
@@ -79,6 +79,9 @@ LONG_TEXT_MAX_LENGTH=100000
 # Chunk size for splitting long text (default: 2500 chars, must be < MAX_TOTAL_LENGTH)
 LONG_TEXT_CHUNK_SIZE=2500
 
+# Batch processing configuration for GPU utilization (tune based on GPU memory)
+LONG_TEXT_BATCH_SIZE=6  # RTX 3090/4090: 4-6, A100: 8-12, H100: 12-16
+
 # Silence padding between chunks in milliseconds (default: 200ms)
 LONG_TEXT_SILENCE_PADDING_MS=200
 
@@ -88,6 +91,37 @@ LONG_TEXT_JOB_RETENTION_DAYS=7
 # Maximum number of concurrent long text jobs (default: 3)
 LONG_TEXT_MAX_CONCURRENT_JOBS=3
 
+# Minimum characters for Long Text async API
+LONG_TEXT_MIN_LENGTH=100
+
+# Default chunking strategy and quality preset for long text processing
+LONG_TEXT_CHUNKING_STRATEGY=sentence
+LONG_TEXT_QUALITY_PRESET=balanced
+
+# Quality preset tuning parameters
+QUALITY_FAST_CHUNK_SIZE=1500
+QUALITY_FAST_CFG_WEIGHT=0.3
+QUALITY_FAST_TEMPERATURE=0.6
+
+QUALITY_BALANCED_CHUNK_SIZE=2500
+QUALITY_BALANCED_CFG_WEIGHT=0.5
+QUALITY_BALANCED_TEMPERATURE=0.8
+
+QUALITY_HIGH_CHUNK_SIZE=2800
+QUALITY_HIGH_CFG_WEIGHT=0.7
+QUALITY_HIGH_TEMPERATURE=1.0
+
+# Pause handling configuration
+ENABLE_PUNCTUATION_PAUSES=true
+ELLIPSIS_PAUSE_MS=800
+EM_DASH_PAUSE_MS=550
+EN_DASH_PAUSE_MS=375
+PERIOD_PAUSE_MS=500
+PARAGRAPH_PAUSE_MS=800
+LINE_BREAK_PAUSE_MS=350
+MIN_PAUSE_MS=200
+MAX_PAUSE_MS=2000
+
 # =============================================================================
 # Docker Volume Configuration
 # =============================================================================
@@ -141,4 +175,4 @@ ENABLE_MEMORY_MONITORING=true
 
 # For slower, more careful speech:
 # CFG_WEIGHT=0.8
-# TEMPERATURE=0.4 
\ No newline at end of file
+# TEMPERATURE=0.4 
diff --git a/.gitignore b/.gitignore
index a7f4fa8..8ef98e1 100755
--- a/.gitignore
+++ b/.gitignore
@@ -99,6 +99,7 @@ coverage.xml
 test_*
 
 !tests/test_*
+!unit_tests/test_*
 
 reference/
 CLAUDE.md
diff --git a/README.md b/README.md
index ff65d82..4a250ea 100755
--- a/README.md
+++ b/README.md
@@ -123,18 +123,18 @@ cp .env.example.docker .env  # Docker-specific paths, ready to use
 # Choose your deployment method:
 
 # API Only (default)
-docker compose -f docker/docker-compose.yml up -d             # Standard (pip-based)
-docker compose -f docker/docker-compose.uv.yml up -d          # uv-optimized (faster builds)
-docker compose -f docker/docker-compose.gpu.yml up -d         # Standard + GPU
-docker compose -f docker/docker-compose.uv.gpu.yml up -d      # uv + GPU (recommended for GPU users)
-docker compose -f docker/docker-compose.cpu.yml up -d         # CPU-only
-docker compose -f docker/docker-compose.blackwell.yml up -d   # Blackwell (50XX) NVIDIA GPUs
+docker compose -p tts-api -f docker/docker-compose.yml up -d             # Standard (pip-based)
+docker compose -p tts-api -f docker/docker-compose.uv.yml up -d          # uv-optimized (faster builds)
+docker compose -p tts-api -f docker/docker-compose.gpu.yml up -d         # Standard + GPU
+docker compose -p tts-api -f docker/docker-compose.uv.gpu.yml up -d      # uv + GPU (recommended for GPU users)
+docker compose -p tts-api -f docker/docker-compose.cpu.yml up -d         # CPU-only
+docker compose -p tts-api -f docker/docker-compose.blackwell.yml up -d   # Blackwell (50XX) NVIDIA GPUs
 
 # API + Frontend (add --profile frontend to any of the above)
-docker compose -f docker/docker-compose.yml --profile frontend up -d             # Standard + Frontend
-docker compose -f docker/docker-compose.gpu.yml --profile frontend up -d         # GPU + Frontend
-docker compose -f docker/docker-compose.uv.gpu.yml --profile frontend up -d      # uv + GPU + Frontend
-docker compose -f docker/docker-compose.blackwell.yml --profile frontend up -d   # (Blackwell) uv + GPU + Frontend
+docker compose -p tts-api -f docker/docker-compose.yml --profile frontend up -d             # Standard + Frontend
+docker compose -p tts-api -f docker/docker-compose.gpu.yml --profile frontend up -d         # GPU + Frontend
+docker compose -p tts-api -f docker/docker-compose.uv.gpu.yml --profile frontend up -d      # uv + GPU + Frontend
+docker compose -p tts-api -f docker/docker-compose.blackwell.yml --profile frontend up -d   # (Blackwell) uv + GPU + Frontend
 
 # Watch the logs as it initializes (the first use of TTS takes the longest)
 docker logs chatterbox-tts-api -f
diff --git a/app/api/endpoints/config.py b/app/api/endpoints/config.py
index ecf58d2..4e90140 100644
--- a/app/api/endpoints/config.py
+++ b/app/api/endpoints/config.py
@@ -50,7 +50,8 @@ async def get_config():
             "cfg_weight": Config.CFG_WEIGHT,
             "temperature": Config.TEMPERATURE,
             "max_chunk_length": Config.MAX_CHUNK_LENGTH,
-            "max_total_length": Config.MAX_TOTAL_LENGTH
+            "max_total_length": Config.MAX_TOTAL_LENGTH,
+            "long_text_min_length": Config.get_long_text_min_length(),
         },
         memory_management={
             "memory_cleanup_interval": Config.MEMORY_CLEANUP_INTERVAL,
diff --git a/app/api/endpoints/long_text.py b/app/api/endpoints/long_text.py
index fa6ed90..aa5e05a 100644
--- a/app/api/endpoints/long_text.py
+++ b/app/api/endpoints/long_text.py
@@ -1,5 +1,5 @@
 """
-Long text TTS endpoints for processing texts > 3000 characters
+Long text TTS endpoints for processing texts that exceed the configured minimum length
 """
 
 import asyncio
@@ -30,6 +30,7 @@
 from app.config import Config
 from app.core.long_text_jobs import get_job_manager
 from app.core.background_tasks import get_processor
+from app.core.quality_presets import get_quality_preset
 from app.core.text_processing import validate_long_text_input, estimate_processing_time
 from app.core import add_route_aliases
 
@@ -43,7 +44,7 @@ async def create_long_text_job(request: LongTextRequest):
     """
     Submit a long text TTS job for background processing.
 
-    Text must be > 3000 characters to use this endpoint.
+    Text must exceed the configured minimum length to use this endpoint.
     For shorter texts, use /audio/speech instead.
     """
     try:
@@ -60,6 +61,17 @@ async def create_long_text_job(request: LongTextRequest):
                 }
             )
 
+        # Resolve quality and chunking configuration
+        preset_name = request.get_quality_preset()
+        preset_config = get_quality_preset(preset_name)
+
+        cfg_weight = request.cfg_weight if request.cfg_weight is not None else preset_config["cfg_weight"]
+        temperature = request.temperature if request.temperature is not None else preset_config["temperature"]
+        chunk_size = request.get_chunk_size(preset_config)
+        silence_padding = request.get_silence_padding()
+        chunking_strategy = request.get_chunking_strategy()
+        pause_settings = request.resolve_pause_settings()
+
         # Get job manager and processor
         job_manager = get_job_manager()
         processor = get_processor()
@@ -70,16 +82,22 @@ async def create_long_text_job(request: LongTextRequest):
             voice=request.voice,
             output_format=request.response_format or "mp3",
             exaggeration=request.exaggeration,
-            cfg_weight=request.cfg_weight,
-            temperature=request.temperature,
-            session_id=request.session_id
+            cfg_weight=cfg_weight,
+            temperature=temperature,
+            session_id=request.session_id,
+            chunking_strategy=chunking_strategy,
+            chunk_size=chunk_size,
+            silence_padding=silence_padding,
+            quality_preset=preset_name,
+            enable_pauses=pause_settings["enable"],
+            custom_pauses=pause_settings["custom"],
         )
 
         # Submit for background processing
         await processor.submit_job(job_id)
 
         # Estimate processing time
-        estimated_time = estimate_processing_time(len(request.input))
+        estimated_time = estimate_processing_time(len(request.input), chunk_size=chunk_size)
 
         return LongTextJobCreateResponse(
             job_id=job_id,
diff --git a/app/api/endpoints/speech.py b/app/api/endpoints/speech.py
index aabb687..18a88c8 100644
--- a/app/api/endpoints/speech.py
+++ b/app/api/endpoints/speech.py
@@ -1,11 +1,10 @@
-"""
-Text-to-speech endpoint
-"""
+"""Text-to-speech endpoint."""
 
 import io
 import os
 import asyncio
 import tempfile
+import logging
 import torch
 import torchaudio as ta
 import base64
@@ -22,6 +21,7 @@
     split_text_into_chunks, concatenate_audio_chunks, add_route_aliases,
     TTSStatus, start_tts_request, update_tts_status, get_voice_library
 )
+from app.core.pause_handler import PauseHandler
 from app.core.tts_model import get_model, is_multilingual
 from app.core.text_processing import split_text_for_streaming, get_streaming_settings
 
@@ -29,6 +29,8 @@
 base_router = APIRouter()
 router = add_route_aliases(base_router)
 
+logger = logging.getLogger(__name__)
+
 # Request counter for memory management
 REQUEST_COUNTER = 0
 
@@ -147,14 +149,27 @@ async def generate_speech_internal(
     language_id: str = "en",
     exaggeration: Optional[float] = None,
     cfg_weight: Optional[float] = None,
-    temperature: Optional[float] = None
+    temperature: Optional[float] = None,
+    enable_pauses: Optional[bool] = None,
+    custom_pauses: Optional[Dict[str, int]] = None,
 ) -> io.BytesIO:
-    """Internal function to generate speech with given parameters"""
+    """Internal function to generate speech with given parameters."""
     global REQUEST_COUNTER
     REQUEST_COUNTER += 1
     
     # Start TTS request tracking
     voice_source = "uploaded file" if voice_sample_path != Config.VOICE_SAMPLE_PATH else "default"
+    resolved_enable_pauses = (
+        Config.ENABLE_PUNCTUATION_PAUSES if enable_pauses is None else bool(enable_pauses)
+    )
+    pause_overrides = {}
+    if custom_pauses:
+        for key, value in custom_pauses.items():
+            try:
+                pause_overrides[str(key)] = int(value)
+            except (TypeError, ValueError):
+                logger.debug("Ignoring invalid custom pause override %r=%r", key, value)
+
     request_id = start_tts_request(
         text=text,
         voice_source=voice_source,
@@ -162,7 +177,9 @@ async def generate_speech_internal(
             "exaggeration": exaggeration,
             "cfg_weight": cfg_weight,
             "temperature": temperature,
-            "voice_sample_path": voice_sample_path
+            "voice_sample_path": voice_sample_path,
+            "enable_pauses": resolved_enable_pauses,
+            "custom_pauses": pause_overrides,
         }
     )
     
@@ -203,41 +220,106 @@ async def generate_speech_internal(
             }
         )
 
-    audio_chunks = []
+    audio_chunks: List[Any] = []
     final_audio = None
     buffer = None
-    
+    assembled_segments: List[Any] = []
+    silence_segments: List[Any] = []
+
     try:
         # Get parameters with defaults
         exaggeration = exaggeration if exaggeration is not None else Config.EXAGGERATION
         cfg_weight = cfg_weight if cfg_weight is not None else Config.CFG_WEIGHT
         temperature = temperature if temperature is not None else Config.TEMPERATURE
-        
-        # Split text into chunks
-        update_tts_status(request_id, TTSStatus.CHUNKING, "Splitting text into chunks")
-        chunks = split_text_into_chunks(text, Config.MAX_CHUNK_LENGTH)
-        
+
+        # Prepare text segments (respect pause settings)
+        update_tts_status(request_id, TTSStatus.CHUNKING, "Preparing text segments")
+
+        if resolved_enable_pauses:
+            pause_defaults = {
+                "...": Config.ELLIPSIS_PAUSE_MS,
+                "—": Config.EM_DASH_PAUSE_MS,
+                "–": Config.EN_DASH_PAUSE_MS,
+                r"\.": Config.PERIOD_PAUSE_MS,
+                "\n\n": Config.PARAGRAPH_PAUSE_MS,
+                "\n": Config.LINE_BREAK_PAUSE_MS,
+            }
+            pause_defaults.update(pause_overrides)
+
+            pause_handler = PauseHandler(
+                enable_pauses=True,
+                custom_pauses=pause_defaults,
+                min_pause_ms=Config.MIN_PAUSE_MS,
+                max_pause_ms=Config.MAX_PAUSE_MS,
+            )
+
+            pause_chunks = pause_handler.process(text)
+            tts_segments: List[Dict[str, Any]] = []
+            for pause_chunk in pause_chunks:
+                sub_chunks = split_text_into_chunks(pause_chunk.text, Config.MAX_CHUNK_LENGTH)
+                for idx, sub_chunk in enumerate(sub_chunks):
+                    pause_after = pause_chunk.pause_after_ms if idx == len(sub_chunks) - 1 else 0
+                    if sub_chunk.strip():
+                        tts_segments.append({
+                            "text": sub_chunk,
+                            "pause_after_ms": pause_after,
+                        })
+        else:
+            raw_chunks = split_text_into_chunks(text, Config.MAX_CHUNK_LENGTH)
+            tts_segments = [
+                {"text": chunk, "pause_after_ms": 0}
+                for chunk in raw_chunks
+                if chunk.strip()
+            ]
+
+        if not tts_segments:
+            update_tts_status(request_id, TTSStatus.ERROR, "No text segments available for generation")
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail={
+                    "error": {
+                        "message": "No valid text segments found after processing pauses.",
+                        "type": "invalid_request_error",
+                    }
+                },
+            )
+
         voice_source = "uploaded file" if voice_sample_path != Config.VOICE_SAMPLE_PATH else "configured sample"
-        print(f"Processing {len(chunks)} text chunks with {voice_source} and parameters:")
+        print(f"Processing {len(tts_segments)} text segments with {voice_source} and parameters:")
         print(f"  - Exaggeration: {exaggeration}")
         print(f"  - CFG Weight: {cfg_weight}")
         print(f"  - Temperature: {temperature}")
-        
+
         # Update status with chunk information
-        update_tts_status(request_id, TTSStatus.GENERATING_AUDIO, "Starting audio generation", 
-                        current_chunk=0, total_chunks=len(chunks))
-        
+        update_tts_status(
+            request_id,
+            TTSStatus.GENERATING_AUDIO,
+            "Starting audio generation",
+            current_chunk=0,
+            total_chunks=len(tts_segments),
+        )
+
         # Generate audio for each chunk with memory management
         loop = asyncio.get_event_loop()
-        
-        for i, chunk in enumerate(chunks):
+
+        channels = None
+        dtype = None
+
+        for i, segment in enumerate(tts_segments):
+            chunk = segment["text"]
+            pause_after_ms = int(segment["pause_after_ms"])
             # Update progress
-            current_step = f"Generating audio for chunk {i+1}/{len(chunks)}"
-            update_tts_status(request_id, TTSStatus.GENERATING_AUDIO, current_step, 
-                            current_chunk=i+1, total_chunks=len(chunks))
-            
-            print(f"Generating audio for chunk {i+1}/{len(chunks)}: '{chunk[:50]}{'...' if len(chunk) > 50 else ''}'")
-            
+            current_step = f"Generating audio for chunk {i+1}/{len(tts_segments)}"
+            update_tts_status(
+                request_id,
+                TTSStatus.GENERATING_AUDIO,
+                current_step,
+                current_chunk=i + 1,
+                total_chunks=len(tts_segments),
+            )
+
+            print(f"Generating audio for chunk {i+1}/{len(tts_segments)}: '{chunk[:50]}{'...' if len(chunk) > 50 else ''}'")
+
             # Use torch.no_grad() to prevent gradient accumulation
             with torch.no_grad():
                 # Run TTS generation in executor to avoid blocking
@@ -263,8 +345,24 @@ async def generate_speech_internal(
                 if hasattr(audio_tensor, 'detach'):
                     audio_tensor = audio_tensor.detach()
                 
+                if audio_tensor.dim() == 1:
+                    audio_tensor = audio_tensor.unsqueeze(0)
+
                 audio_chunks.append(audio_tensor)
-            
+                assembled_segments.append(audio_tensor)
+
+                if channels is None:
+                    channels = audio_tensor.shape[0]
+                if dtype is None:
+                    dtype = audio_tensor.dtype
+
+                if pause_after_ms > 0 and channels is not None and dtype is not None:
+                    silence_samples = max(0, int(round((pause_after_ms / 1000.0) * model.sr)))
+                    if silence_samples > 0:
+                        silence_tensor = torch.zeros((channels, silence_samples), dtype=dtype, device=audio_tensor.device)
+                        assembled_segments.append(silence_tensor)
+                        silence_segments.append(silence_tensor)
+
             # Periodic memory cleanup during generation
             if i > 0 and i % 3 == 0:  # Every 3 chunks
                 import gc
@@ -273,13 +371,18 @@ async def generate_speech_internal(
                     torch.cuda.empty_cache()
         
         # Concatenate all chunks with memory management
-        if len(audio_chunks) > 1:
+        if len(assembled_segments) == 1:
+            final_audio = assembled_segments[0]
+        else:
             update_tts_status(request_id, TTSStatus.CONCATENATING, "Concatenating audio chunks")
             print("Concatenating audio chunks...")
             with torch.no_grad():
-                final_audio = concatenate_audio_chunks(audio_chunks, model.sr)
-        else:
-            final_audio = audio_chunks[0]
+                if resolved_enable_pauses:
+                    final_audio = assembled_segments[0]
+                    for segment in assembled_segments[1:]:
+                        final_audio = torch.cat([final_audio, segment.to(final_audio.device)], dim=1)
+                else:
+                    final_audio = concatenate_audio_chunks(audio_chunks, model.sr)
         
         # Convert to WAV format
         update_tts_status(request_id, TTSStatus.FINALIZING, "Converting to WAV format")
@@ -320,16 +423,21 @@ async def generate_speech_internal(
             # Clean up all audio chunks
             for chunk in audio_chunks:
                 safe_delete_tensors(chunk)
-            
+
+            for silence in silence_segments:
+                safe_delete_tensors(silence)
+
             # Clean up final audio tensor
             if final_audio is not None:
                 safe_delete_tensors(final_audio)
                 if 'final_audio_cpu' in locals():
                     safe_delete_tensors(final_audio_cpu)
-            
+
             # Clear the list
             audio_chunks.clear()
-            
+            assembled_segments.clear()
+            silence_segments.clear()
+
             # Periodic memory cleanup
             if REQUEST_COUNTER % Config.MEMORY_CLEANUP_INTERVAL == 0:
                 cleanup_memory()
@@ -796,7 +904,11 @@ async def text_to_speech(request: TTSRequest):
     
     # Resolve voice name to file path and language
     voice_sample_path, language_id = resolve_voice_path_and_language(request.voice)
-    
+
+    enable_pauses = request.enable_pauses
+    if enable_pauses is None:
+        enable_pauses = Config.ENABLE_PUNCTUATION_PAUSES
+
     # Check if SSE streaming is requested
     if request.stream_format == "sse":
         # Return SSE streaming response
@@ -827,7 +939,9 @@ async def text_to_speech(request: TTSRequest):
             language_id=language_id,
             exaggeration=request.exaggeration,
             cfg_weight=request.cfg_weight,
-            temperature=request.temperature
+            temperature=request.temperature,
+            enable_pauses=enable_pauses,
+            custom_pauses=request.custom_pauses,
         )
         
         # Create response
diff --git a/app/config.py b/app/config.py
index 5b9fc87..c9a9c51 100644
--- a/app/config.py
+++ b/app/config.py
@@ -12,7 +12,10 @@
 
 class Config:
     """Application configuration class"""
-    
+
+    _DEFAULT_LONG_TEXT_MIN_LENGTH = 3000
+    _DEFAULT_LONG_TEXT_MAX_LENGTH = 100000
+
     # Server settings
     HOST = os.getenv('HOST', '0.0.0.0')
     PORT = int(os.getenv('PORT', 4123))
@@ -36,11 +39,44 @@ class Config:
 
     # Long text processing settings
     LONG_TEXT_DATA_DIR = os.getenv('LONG_TEXT_DATA_DIR', './data/long_text_jobs')
-    LONG_TEXT_MAX_LENGTH = int(os.getenv('LONG_TEXT_MAX_LENGTH', 100000))
+    LONG_TEXT_MIN_LENGTH = int(os.getenv('LONG_TEXT_MIN_LENGTH', _DEFAULT_LONG_TEXT_MIN_LENGTH))
+    LONG_TEXT_MAX_LENGTH = int(os.getenv('LONG_TEXT_MAX_LENGTH', _DEFAULT_LONG_TEXT_MAX_LENGTH))
     LONG_TEXT_CHUNK_SIZE = int(os.getenv('LONG_TEXT_CHUNK_SIZE', 2500))
+    LONG_TEXT_BATCH_SIZE = int(os.getenv('LONG_TEXT_BATCH_SIZE', 4))
     LONG_TEXT_SILENCE_PADDING_MS = int(os.getenv('LONG_TEXT_SILENCE_PADDING_MS', 200))
     LONG_TEXT_JOB_RETENTION_DAYS = int(os.getenv('LONG_TEXT_JOB_RETENTION_DAYS', 7))
     LONG_TEXT_MAX_CONCURRENT_JOBS = int(os.getenv('LONG_TEXT_MAX_CONCURRENT_JOBS', 3))
+    LONG_TEXT_CHUNKING_STRATEGY = os.getenv('LONG_TEXT_CHUNKING_STRATEGY', 'sentence')
+    LONG_TEXT_QUALITY_PRESET = os.getenv('LONG_TEXT_QUALITY_PRESET', 'balanced')
+
+    QUALITY_PRESETS = {
+        "fast": {
+            "chunk_size": int(os.getenv('QUALITY_FAST_CHUNK_SIZE', '1500')),
+            "cfg_weight": float(os.getenv('QUALITY_FAST_CFG_WEIGHT', '0.3')),
+            "temperature": float(os.getenv('QUALITY_FAST_TEMPERATURE', '0.6')),
+        },
+        "balanced": {
+            "chunk_size": int(os.getenv('QUALITY_BALANCED_CHUNK_SIZE', '2500')),
+            "cfg_weight": float(os.getenv('QUALITY_BALANCED_CFG_WEIGHT', '0.5')),
+            "temperature": float(os.getenv('QUALITY_BALANCED_TEMPERATURE', '0.8')),
+        },
+        "high": {
+            "chunk_size": int(os.getenv('QUALITY_HIGH_CHUNK_SIZE', '2800')),
+            "cfg_weight": float(os.getenv('QUALITY_HIGH_CFG_WEIGHT', '0.7')),
+            "temperature": float(os.getenv('QUALITY_HIGH_TEMPERATURE', '1.0')),
+        },
+    }
+
+    # Pause handling configuration
+    ENABLE_PUNCTUATION_PAUSES = os.getenv('ENABLE_PUNCTUATION_PAUSES', 'true').lower() == 'true'
+    ELLIPSIS_PAUSE_MS = int(os.getenv('ELLIPSIS_PAUSE_MS', 800))
+    EM_DASH_PAUSE_MS = int(os.getenv('EM_DASH_PAUSE_MS', 550))
+    EN_DASH_PAUSE_MS = int(os.getenv('EN_DASH_PAUSE_MS', 375))
+    PERIOD_PAUSE_MS = int(os.getenv('PERIOD_PAUSE_MS', 500))
+    PARAGRAPH_PAUSE_MS = int(os.getenv('PARAGRAPH_PAUSE_MS', 800))
+    LINE_BREAK_PAUSE_MS = int(os.getenv('LINE_BREAK_PAUSE_MS', 350))
+    MIN_PAUSE_MS = int(os.getenv('MIN_PAUSE_MS', 200))
+    MAX_PAUSE_MS = int(os.getenv('MAX_PAUSE_MS', 2000))
 
     # Multilingual model settings
     USE_MULTILINGUAL_MODEL = os.getenv('USE_MULTILINGUAL_MODEL', 'true').lower() == 'true'
@@ -56,6 +92,9 @@ class Config:
     @classmethod
     def validate(cls):
         """Validate configuration values"""
+        min_length = cls.get_long_text_min_length()
+        max_length = cls.get_long_text_max_length()
+
         if not (0.25 <= cls.EXAGGERATION <= 2.0):
             raise ValueError(f"EXAGGERATION must be between 0.25 and 2.0, got {cls.EXAGGERATION}")
         if not (0.0 <= cls.CFG_WEIGHT <= 1.0):
@@ -70,8 +109,14 @@ def validate(cls):
             raise ValueError(f"MEMORY_CLEANUP_INTERVAL must be positive, got {cls.MEMORY_CLEANUP_INTERVAL}")
         if cls.CUDA_CACHE_CLEAR_INTERVAL <= 0:
             raise ValueError(f"CUDA_CACHE_CLEAR_INTERVAL must be positive, got {cls.CUDA_CACHE_CLEAR_INTERVAL}")
-        if cls.LONG_TEXT_MAX_LENGTH <= cls.MAX_TOTAL_LENGTH:
-            raise ValueError(f"LONG_TEXT_MAX_LENGTH ({cls.LONG_TEXT_MAX_LENGTH}) must be greater than MAX_TOTAL_LENGTH ({cls.MAX_TOTAL_LENGTH})")
+        if min_length <= 0:
+            raise ValueError(f"LONG_TEXT_MIN_LENGTH must be positive, got {min_length}")
+        if max_length <= min_length:
+            raise ValueError(
+                "LONG_TEXT_MAX_LENGTH ({}) must be greater than LONG_TEXT_MIN_LENGTH ({})".format(
+                    max_length, min_length
+                )
+            )
         if cls.LONG_TEXT_CHUNK_SIZE <= 0:
             raise ValueError(f"LONG_TEXT_CHUNK_SIZE must be positive, got {cls.LONG_TEXT_CHUNK_SIZE}")
         if cls.LONG_TEXT_CHUNK_SIZE >= cls.MAX_TOTAL_LENGTH:
@@ -82,6 +127,44 @@ def validate(cls):
             raise ValueError(f"LONG_TEXT_JOB_RETENTION_DAYS must be positive, got {cls.LONG_TEXT_JOB_RETENTION_DAYS}")
         if cls.LONG_TEXT_MAX_CONCURRENT_JOBS <= 0:
             raise ValueError(f"LONG_TEXT_MAX_CONCURRENT_JOBS must be positive, got {cls.LONG_TEXT_MAX_CONCURRENT_JOBS}")
+        if cls.MIN_PAUSE_MS < 0:
+            raise ValueError(f"MIN_PAUSE_MS must be non-negative, got {cls.MIN_PAUSE_MS}")
+        if cls.MAX_PAUSE_MS < cls.MIN_PAUSE_MS:
+            raise ValueError(
+                f"MAX_PAUSE_MS ({cls.MAX_PAUSE_MS}) must be greater than or equal to MIN_PAUSE_MS ({cls.MIN_PAUSE_MS})"
+            )
+
+    @staticmethod
+    def _get_int_env(name: str, fallback: int) -> int:
+        value = os.getenv(name)
+        if value is None or value == "":
+            return fallback
+
+        try:
+            return int(value)
+        except ValueError as exc:
+            raise ValueError(f"{name} must be an integer, got {value!r}") from exc
+
+    @classmethod
+    def refresh_long_text_limits(cls) -> None:
+        cls.LONG_TEXT_MIN_LENGTH = cls._get_int_env(
+            "LONG_TEXT_MIN_LENGTH",
+            cls._DEFAULT_LONG_TEXT_MIN_LENGTH,
+        )
+        cls.LONG_TEXT_MAX_LENGTH = cls._get_int_env(
+            "LONG_TEXT_MAX_LENGTH",
+            cls._DEFAULT_LONG_TEXT_MAX_LENGTH,
+        )
+
+    @classmethod
+    def get_long_text_min_length(cls) -> int:
+        cls.refresh_long_text_limits()
+        return cls.LONG_TEXT_MIN_LENGTH
+
+    @classmethod
+    def get_long_text_max_length(cls) -> int:
+        cls.refresh_long_text_limits()
+        return cls.LONG_TEXT_MAX_LENGTH
 
 
 def detect_device():
diff --git a/app/core/background_tasks.py b/app/core/background_tasks.py
index 1ceca8c..b22d729 100644
--- a/app/core/background_tasks.py
+++ b/app/core/background_tasks.py
@@ -8,11 +8,11 @@
 import traceback
 from datetime import datetime
 from pathlib import Path
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, List, Tuple
 
 from app.config import Config
 from app.core.long_text_jobs import get_job_manager
-from app.core.text_processing import split_text_for_long_generation, estimate_processing_time
+from app.core.text_processing import split_text_for_long_generation
 from app.core.audio_processing import concatenate_audio_files, AudioConcatenationError
 from app.api.endpoints.speech import generate_speech_internal, resolve_voice_path_and_language
 from app.models.long_text import (
@@ -120,7 +120,7 @@ def _cleanup_task(self, job_id: str):
             del self.active_tasks[job_id]
 
     async def _process_job(self, job_id: str):
-        """Process a single long text job"""
+        """Process a single long text job with batched GPU inference"""
         logger.info(f"Starting processing for job {job_id}")
 
         try:
@@ -141,12 +141,22 @@ async def _process_job(self, job_id: str):
                 await self._fail_job(job_id, "Input text not found")
                 return
 
+            parameters = metadata.parameters or {}
+            chunk_size = int(parameters.get('chunk_size', Config.LONG_TEXT_CHUNK_SIZE))
+            if chunk_size <= 0:
+                chunk_size = Config.LONG_TEXT_CHUNK_SIZE
+
+            chunking_strategy = parameters.get(
+                'chunking_strategy', Config.LONG_TEXT_CHUNKING_STRATEGY
+            )
+
             # Phase 1: Text chunking
             await self._update_job_status(job_id, LongTextJobStatus.CHUNKING, "Splitting text into chunks")
 
             chunks = split_text_for_long_generation(
                 input_text,
-                max_chunk_size=Config.LONG_TEXT_CHUNK_SIZE
+                max_chunk_size=chunk_size,
+                strategy=chunking_strategy
             )
 
             if not chunks:
@@ -160,82 +170,89 @@ async def _process_job(self, job_id: str):
 
             logger.info(f"Job {job_id}: Split into {len(chunks)} chunks")
 
-            # Phase 2: Generate audio for each chunk
-            await self._update_job_status(job_id, LongTextJobStatus.PROCESSING, f"Generating audio for {len(chunks)} chunks")
+            # Phase 2: Generate audio for all chunks with batching
+            await self._update_job_status(
+                job_id,
+                LongTextJobStatus.PROCESSING,
+                f"Generating audio for {len(chunks)} chunks",
+            )
 
             voice_path, language_id = resolve_voice_path_and_language(metadata.voice)
 
-            chunk_audio_files = []
-            for i, chunk in enumerate(chunks):
-                # Check if job was paused or cancelled
+            batch_size = int(parameters.get('batch_size', Config.LONG_TEXT_BATCH_SIZE))
+            if batch_size <= 0:
+                batch_size = Config.LONG_TEXT_BATCH_SIZE
+
+            chunk_audio_data: List[Tuple[int, Any, LongTextChunk]] = []
+
+            for batch_start in range(0, len(chunks), batch_size):
                 current_metadata = self.job_manager._load_job_metadata(job_id)
                 if current_metadata and current_metadata.status in [LongTextJobStatus.PAUSED, LongTextJobStatus.CANCELLED]:
                     logger.info(f"Job {job_id} was paused/cancelled, stopping processing")
                     return
 
-                # Update current chunk
-                current_metadata.current_chunk = i
-                self.job_manager._save_job_metadata(current_metadata)
-
-                # Update chunk status
-                chunk.processing_started_at = datetime.utcnow()
-                chunks[i] = chunk  # Update in list
+                batch_end = min(batch_start + batch_size, len(chunks))
+                batch_chunks = chunks[batch_start:batch_end]
 
-                logger.info(f"Job {job_id}: Processing chunk {i+1}/{len(chunks)} ({len(chunk.text)} chars)")
+                logger.info(
+                    f"Job {job_id}: Processing batch {batch_start // batch_size + 1} "
+                    f"(chunks {batch_start + 1}-{batch_end}/{len(chunks)})"
+                )
 
-                try:
-                    # Generate audio for this chunk
-                    audio_buffer = await generate_speech_internal(
-                        text=chunk.text,
-                        voice_sample_path=voice_path,
-                        language_id=language_id,
-                        exaggeration=metadata.parameters.get('exaggeration'),
-                        cfg_weight=metadata.parameters.get('cfg_weight'),
-                        temperature=metadata.parameters.get('temperature')
+                batch_tasks = []
+                for i, chunk in enumerate(batch_chunks, start=batch_start):
+                    batch_tasks.append(
+                        self._generate_chunk_audio(
+                            job_id=job_id,
+                            chunk=chunk,
+                            chunk_index=i,
+                            voice_path=voice_path,
+                            language_id=language_id,
+                            parameters=parameters,
+                        )
                     )
 
-                    # Save chunk audio file
-                    chunk_filename = f"chunk_{i+1:03d}.wav"
-                    chunk_audio_path = self.job_manager._get_job_file_paths(job_id)['chunks_dir'] / chunk_filename
-
-                    with open(chunk_audio_path, 'wb') as f:
-                        f.write(audio_buffer.getvalue())
+                batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
 
-                    # Update chunk metadata
-                    chunk.audio_file = chunk_filename
-                    chunk.processing_completed_at = datetime.utcnow()
-                    chunk.duration_ms = int((chunk.processing_completed_at - chunk.processing_started_at).total_seconds() * 1000)
-
-                    chunk_audio_files.append(chunk_audio_path)
-                    chunks[i] = chunk
-
-                    # Update job progress
-                    current_metadata.completed_chunks = i + 1
-                    self.job_manager._save_job_metadata(current_metadata)
-                    self.job_manager._save_chunks_data(job_id, chunks)
-
-                    logger.info(f"Job {job_id}: Completed chunk {i+1}/{len(chunks)}")
+                current_metadata = self.job_manager._load_job_metadata(job_id)
+                if not current_metadata:
+                    current_metadata = metadata
+
+                for i, result in enumerate(batch_results, start=batch_start):
+                    chunk = chunks[i]
+                    if isinstance(result, Exception):
+                        logger.error(f"Job {job_id}: Failed to process chunk {i + 1}: {result}")
+                        chunk.error = str(result)
+                        chunks[i] = chunk
+                        if i not in current_metadata.failed_chunks:
+                            current_metadata.failed_chunks.append(i)
+                    else:
+                        audio_buffer, updated_chunk = result
+                        chunk_audio_data.append((i, audio_buffer, updated_chunk))
+                        chunks[i] = updated_chunk
+
+                completed_chunks = len([c for c in chunks if c.audio_file])
+                current_metadata.completed_chunks = completed_chunks
+                current_metadata.current_chunk = min(batch_end, len(chunks)) - 1
+                self.job_manager._save_job_metadata(current_metadata)
+                self.job_manager._save_chunks_data(job_id, chunks)
 
-                except Exception as e:
-                    logger.error(f"Job {job_id}: Failed to process chunk {i+1}: {e}")
-                    chunk.error = str(e)
-                    chunks[i] = chunk
+            if not chunk_audio_data:
+                await self._fail_job(job_id, "No chunks were successfully generated")
+                return
 
-                    # Mark chunk as failed
-                    if i not in current_metadata.failed_chunks:
-                        current_metadata.failed_chunks.append(i)
-                        self.job_manager._save_job_metadata(current_metadata)
+            if len(chunk_audio_data) < len(chunks):
+                logger.warning(
+                    f"Job {job_id}: Only {len(chunk_audio_data)}/{len(chunks)} chunks generated successfully"
+                )
 
-                    # For now, continue with other chunks (could be made configurable)
-                    continue
+            logger.info(f"Job {job_id}: Writing {len(chunk_audio_data)} audio files to disk")
+            chunk_audio_files = await self._batch_write_audio_files(job_id, chunk_audio_data)
 
-            # Check if we have enough successful chunks to continue
-            successful_chunks = [f for f in chunk_audio_files if f.exists()]
-            if len(successful_chunks) == 0:
+            successful_chunks = [path for path in chunk_audio_files if path.exists()]
+            if not successful_chunks:
                 await self._fail_job(job_id, "No chunks were successfully generated")
                 return
-            elif len(successful_chunks) < len(chunks):
-                logger.warning(f"Job {job_id}: Only {len(successful_chunks)}/{len(chunks)} chunks generated successfully")
 
             # Phase 3: Concatenate audio chunks
             await self._update_job_status(job_id, LongTextJobStatus.PROCESSING, "Combining audio chunks")
@@ -244,11 +261,17 @@ async def _process_job(self, job_id: str):
                 output_filename = f"final.{metadata.output_format}"
                 output_path = self.job_manager._get_job_file_paths(job_id)['output_dir'] / output_filename
 
+                silence_padding_ms = parameters.get(
+                    'silence_padding_ms', Config.LONG_TEXT_SILENCE_PADDING_MS
+                )
+                if silence_padding_ms is None or silence_padding_ms < 0:
+                    silence_padding_ms = Config.LONG_TEXT_SILENCE_PADDING_MS
+
                 concatenation_metadata = concatenate_audio_files(
                     audio_files=successful_chunks,
                     output_path=output_path,
                     output_format=metadata.output_format,
-                    silence_duration_ms=Config.LONG_TEXT_SILENCE_PADDING_MS,
+                    silence_duration_ms=silence_padding_ms,
                     # normalize_volume=True,
                     normalize_volume=False,
                     remove_source_files=False  # Keep source chunks for debugging
@@ -283,6 +306,83 @@ async def _process_job(self, job_id: str):
             logger.error(traceback.format_exc())
             await self._fail_job(job_id, f"Unexpected error: {e}")
 
+    async def _generate_chunk_audio(
+        self,
+        job_id: str,
+        chunk: LongTextChunk,
+        chunk_index: int,
+        voice_path: str,
+        language_id: str,
+        parameters: Dict[str, Any],
+    ):
+        """Generate audio for a single chunk (executed in parallel within a batch)"""
+
+        chunk.processing_started_at = datetime.utcnow()
+        chunk.error = None
+
+        logger.debug(
+            f"Job {job_id}: Processing chunk {chunk_index + 1} ({len(chunk.text)} chars)"
+        )
+
+        try:
+            pause_settings = (parameters.get('pause_settings') or {}) if parameters else {}
+
+            audio_buffer = await generate_speech_internal(
+                text=chunk.text,
+                voice_sample_path=voice_path,
+                language_id=language_id,
+                exaggeration=(parameters or {}).get('exaggeration'),
+                cfg_weight=(parameters or {}).get('cfg_weight'),
+                temperature=(parameters or {}).get('temperature'),
+                enable_pauses=pause_settings.get('enable'),
+                custom_pauses=pause_settings.get('custom'),
+            )
+
+            chunk.audio_file = f"chunk_{chunk_index + 1:03d}.wav"
+            chunk.processing_completed_at = datetime.utcnow()
+            chunk.duration_ms = int(
+                (chunk.processing_completed_at - chunk.processing_started_at).total_seconds() * 1000
+            )
+
+            logger.debug(
+                f"Job {job_id}: Completed chunk {chunk_index + 1} in {chunk.duration_ms}ms"
+            )
+
+            return audio_buffer, chunk
+
+        except Exception as exc:
+            logger.error(f"Job {job_id}: Error processing chunk {chunk_index + 1}: {exc}")
+            raise
+
+    async def _batch_write_audio_files(
+        self,
+        job_id: str,
+        chunk_audio_data: List[Tuple[int, Any, LongTextChunk]],
+    ) -> List[Path]:
+        """Write generated audio buffers to disk after GPU work completes"""
+
+        job_paths = self.job_manager._get_job_file_paths(job_id)
+        written_paths: List[Path] = []
+
+        for chunk_index, audio_buffer, chunk in chunk_audio_data:
+            chunk_audio_path = job_paths['chunks_dir'] / chunk.audio_file
+
+            await asyncio.to_thread(
+                self._write_audio_file,
+                chunk_audio_path,
+                audio_buffer.getvalue(),
+            )
+
+            written_paths.append(chunk_audio_path)
+
+        logger.info(f"Job {job_id}: Wrote {len(written_paths)} audio files to disk")
+        return written_paths
+
+    def _write_audio_file(self, path: Path, data: bytes) -> None:
+        """Write binary audio data to disk"""
+        with open(path, 'wb') as file_obj:
+            file_obj.write(data)
+
     async def _update_job_status(self, job_id: str, status: LongTextJobStatus, message: str = ""):
         """Update job status"""
         try:
diff --git a/app/core/long_text_jobs.py b/app/core/long_text_jobs.py
index 6d5fb62..2d92971 100644
--- a/app/core/long_text_jobs.py
+++ b/app/core/long_text_jobs.py
@@ -158,14 +158,22 @@ def _load_input_text(self, job_id: str) -> Optional[str]:
             logger.error(f"Failed to load input text for job {job_id}: {e}")
             return None
 
-    def create_job(self,
-                   text: str,
-                   voice: Optional[str] = None,
-                   output_format: str = "mp3",
-                   exaggeration: Optional[float] = None,
-                   cfg_weight: Optional[float] = None,
-                   temperature: Optional[float] = None,
-                   session_id: Optional[str] = None) -> Tuple[str, int]:
+    def create_job(
+        self,
+        text: str,
+        voice: Optional[str] = None,
+        output_format: str = "mp3",
+        exaggeration: Optional[float] = None,
+        cfg_weight: Optional[float] = None,
+        temperature: Optional[float] = None,
+        session_id: Optional[str] = None,
+        chunking_strategy: Optional[str] = None,
+        chunk_size: Optional[int] = None,
+        silence_padding: Optional[int] = None,
+        quality_preset: Optional[str] = None,
+        enable_pauses: Optional[bool] = None,
+        custom_pauses: Optional[Dict[str, int]] = None,
+    ) -> Tuple[str, int]:
         """
         Create a new long text job
 
@@ -178,8 +186,36 @@ def create_job(self,
         # Calculate text hash for potential deduplication
         text_hash = self._generate_text_hash(text)
 
+        # Resolve chunking configuration
+        resolved_chunk_size = chunk_size or Config.LONG_TEXT_CHUNK_SIZE
+        if resolved_chunk_size <= 0:
+            resolved_chunk_size = Config.LONG_TEXT_CHUNK_SIZE
+
+        resolved_chunking_strategy = chunking_strategy or Config.LONG_TEXT_CHUNKING_STRATEGY
+        resolved_silence_padding = (
+            silence_padding
+            if silence_padding is not None and silence_padding >= 0
+            else Config.LONG_TEXT_SILENCE_PADDING_MS
+        )
+        resolved_quality_preset = quality_preset or Config.LONG_TEXT_QUALITY_PRESET
+
+        resolved_enable_pauses = (
+            Config.ENABLE_PUNCTUATION_PAUSES if enable_pauses is None else bool(enable_pauses)
+        )
+
+        resolved_custom_pauses = None
+        if custom_pauses:
+            resolved_custom_pauses = {}
+            for key, value in custom_pauses.items():
+                try:
+                    resolved_custom_pauses[str(key)] = int(value)
+                except (TypeError, ValueError):
+                    logger.debug("Ignoring invalid custom pause value %r=%r", key, value)
+            if not resolved_custom_pauses:
+                resolved_custom_pauses = None
+
         # Estimate number of chunks
-        estimated_chunks = max(1, (len(text) + Config.LONG_TEXT_CHUNK_SIZE - 1) // Config.LONG_TEXT_CHUNK_SIZE)
+        estimated_chunks = max(1, (len(text) + resolved_chunk_size - 1) // resolved_chunk_size)
 
         # Create job directories
         self._create_job_directories(job_id)
@@ -203,7 +239,15 @@ def create_job(self,
                 'exaggeration': exaggeration,
                 'cfg_weight': cfg_weight,
                 'temperature': temperature,
-                'output_format': output_format
+                'output_format': output_format,
+                'chunking_strategy': resolved_chunking_strategy,
+                'chunk_size': resolved_chunk_size,
+                'quality_preset': resolved_quality_preset,
+                'silence_padding_ms': resolved_silence_padding,
+                'pause_settings': {
+                    'enable': resolved_enable_pauses,
+                    'custom': resolved_custom_pauses,
+                },
             },
             output_format=output_format,
             user_session_id=session_id
@@ -754,7 +798,13 @@ def retry_job(self, job_id: str, preserve_chunks: bool = True,
             exaggeration=parameters.get('exaggeration'),
             cfg_weight=parameters.get('cfg_weight'),
             temperature=parameters.get('temperature'),
-            session_id=original_metadata.user_session_id
+            session_id=original_metadata.user_session_id,
+            chunking_strategy=parameters.get('chunking_strategy'),
+            chunk_size=parameters.get('chunk_size'),
+            silence_padding=parameters.get('silence_padding_ms'),
+            quality_preset=parameters.get('quality_preset'),
+            enable_pauses=(parameters.get('pause_settings') or {}).get('enable'),
+            custom_pauses=(parameters.get('pause_settings') or {}).get('custom'),
         )
 
         # Update metadata to link to original job
diff --git a/app/core/pause_handler.py b/app/core/pause_handler.py
new file mode 100644
index 0000000..54aaa7b
--- /dev/null
+++ b/app/core/pause_handler.py
@@ -0,0 +1,172 @@
+"""Utility classes for punctuation-based pause handling."""
+
+from __future__ import annotations
+
+import logging
+import re
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+from app.config import Config
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TextChunk:
+    """Representation of a processed text segment."""
+
+    text: str
+    pause_after_ms: int
+    original_separator: Optional[str] = None
+
+
+class PauseHandler:
+    """Split text around punctuation and expose pause metadata."""
+
+    DEFAULT_PAUSES: Dict[str, int] = {
+        r"\.\.\.": Config.ELLIPSIS_PAUSE_MS,
+        r"—": Config.EM_DASH_PAUSE_MS,
+        r"–": Config.EN_DASH_PAUSE_MS,
+        r"\.": Config.PERIOD_PAUSE_MS,
+        r"\n\n": Config.PARAGRAPH_PAUSE_MS,
+        r"\n": Config.LINE_BREAK_PAUSE_MS,
+    }
+
+    def __init__(
+        self,
+        enable_pauses: bool = True,
+        custom_pauses: Optional[Dict[str, int]] = None,
+        min_pause_ms: int = Config.MIN_PAUSE_MS,
+        max_pause_ms: int = Config.MAX_PAUSE_MS,
+    ) -> None:
+        self.enable_pauses = enable_pauses
+        self.min_pause_ms = min_pause_ms
+        self.max_pause_ms = max_pause_ms
+
+        self.pause_patterns: Dict[str, int] = {}
+        for pattern, duration in self.DEFAULT_PAUSES.items():
+            self.pause_patterns[self._normalize_pattern(pattern)] = int(duration)
+
+        if custom_pauses:
+            for raw_pattern, duration in custom_pauses.items():
+                try:
+                    normalized = self._normalize_pattern(raw_pattern)
+                    self.pause_patterns[normalized] = int(duration)
+                except (TypeError, ValueError) as exc:
+                    logger.debug("Ignoring invalid custom pause %r: %s", raw_pattern, exc)
+
+        logger.debug("PauseHandler initialised with %d patterns", len(self.pause_patterns))
+
+    def process(self, text: str) -> List[TextChunk]:
+        """Split text and annotate pauses."""
+
+        cleaned = text.strip()
+        if not self.enable_pauses or not cleaned:
+            return [TextChunk(text=cleaned, pause_after_ms=0)] if cleaned else []
+
+        matches: List[Dict[str, object]] = []
+        for pattern, duration in self.pause_patterns.items():
+            compiled = re.compile(pattern)
+            for match in compiled.finditer(text):
+                matches.append(
+                    {
+                        "start": match.start(),
+                        "end": match.end(),
+                        "separator": match.group(0),
+                        "pause_ms": int(duration),
+                    }
+                )
+
+        matches.sort(key=lambda m: (int(m["start"]), -(int(m["end"]) - int(m["start"]))))
+
+        chunks: List[TextChunk] = []
+        position = 0
+        for match in matches:
+            start = int(match["start"])
+            end = int(match["end"])
+            if start < position:
+                continue
+
+            chunk_text = text[position:start].strip()
+            if chunk_text:
+                pause_ms = self._clamp_pause(int(match["pause_ms"]))
+                chunks.append(
+                    TextChunk(
+                        text=chunk_text,
+                        pause_after_ms=pause_ms,
+                        original_separator=str(match["separator"]),
+                    )
+                )
+
+            position = end
+
+        remaining = text[position:].strip()
+        if remaining:
+            chunks.append(TextChunk(text=remaining, pause_after_ms=0, original_separator=None))
+
+        logger.debug("Split text into %d pause-aware chunks", len(chunks))
+        return chunks
+
+    def estimate_total_pause_time(self, text: str) -> int:
+        """Estimate cumulative pause duration for ``text``."""
+
+        return sum(chunk.pause_after_ms for chunk in self.process(text))
+
+    def get_pause_summary(self, text: str) -> Dict[str, object]:
+        """Return statistics about pauses for ``text``."""
+
+        chunks = self.process(text)
+        pause_types: Dict[str, int] = {}
+        for chunk in chunks:
+            if chunk.pause_after_ms > 0:
+                separator = chunk.original_separator or "other"
+                pause_types[separator] = pause_types.get(separator, 0) + 1
+
+        return {
+            "total_chunks": len(chunks),
+            "total_pause_ms": sum(chunk.pause_after_ms for chunk in chunks),
+            "pause_types": pause_types,
+            "chunks_with_pauses": sum(1 for chunk in chunks if chunk.pause_after_ms > 0),
+        }
+
+    def _clamp_pause(self, pause_ms: int) -> int:
+        return max(self.min_pause_ms, min(pause_ms, self.max_pause_ms))
+
+    @staticmethod
+    def _normalize_pattern(pattern: str) -> str:
+        if pattern is None:
+            raise ValueError("Pause pattern cannot be None")
+        if "\\" in pattern:
+            return pattern
+        return re.escape(pattern)
+
+
+def split_text_with_pauses(
+    text: str,
+    enable_pauses: bool = True,
+    custom_pauses: Optional[Dict[str, int]] = None,
+    min_pause_ms: int = Config.MIN_PAUSE_MS,
+    max_pause_ms: int = Config.MAX_PAUSE_MS,
+) -> List[TextChunk]:
+    """Convenience wrapper around :class:`PauseHandler`."""
+
+    pause_mapping: Dict[str, int] = {
+        r"\.\.\.": Config.ELLIPSIS_PAUSE_MS,
+        r"—": Config.EM_DASH_PAUSE_MS,
+        r"–": Config.EN_DASH_PAUSE_MS,
+        r"\.": Config.PERIOD_PAUSE_MS,
+        r"\n\n": Config.PARAGRAPH_PAUSE_MS,
+        r"\n": Config.LINE_BREAK_PAUSE_MS,
+    }
+
+    if custom_pauses:
+        pause_mapping.update(custom_pauses)
+
+    handler = PauseHandler(
+        enable_pauses=enable_pauses,
+        custom_pauses=pause_mapping,
+        min_pause_ms=min_pause_ms,
+        max_pause_ms=max_pause_ms,
+    )
+    return handler.process(text)
diff --git a/app/core/quality_presets.py b/app/core/quality_presets.py
new file mode 100644
index 0000000..3ead838
--- /dev/null
+++ b/app/core/quality_presets.py
@@ -0,0 +1,44 @@
+"""Quality presets for TTS generation configurable via environment variables."""
+
+from app.config import Config
+
+
+def get_quality_preset(preset_name: str | None = None) -> dict:
+    """Return the quality preset configuration.
+
+    Args:
+        preset_name: Name of the preset to retrieve. Defaults to the configured
+            LONG_TEXT_QUALITY_PRESET when ``None`` is provided.
+
+    Returns:
+        A dictionary with ``chunk_size``, ``cfg_weight`` and ``temperature``
+        settings. Falls back to the ``balanced`` preset when the requested name
+        is not defined.
+    """
+
+    if preset_name is None:
+        preset_name = Config.LONG_TEXT_QUALITY_PRESET
+
+    return Config.QUALITY_PRESETS.get(preset_name, Config.QUALITY_PRESETS["balanced"])
+
+
+def get_chunk_size_for_preset(preset_name: str | None = None) -> int:
+    """Return the chunk size associated with a preset.
+
+    Args:
+        preset_name: Optional preset name. When omitted the configured default
+            preset is used.
+
+    Returns:
+        Chunk size for the preset, falling back to ``Config.LONG_TEXT_CHUNK_SIZE``
+        if the preset is not found or does not define a chunk size.
+    """
+
+    preset = get_quality_preset(preset_name) if preset_name else None
+
+    if preset is not None:
+        chunk_size = preset.get("chunk_size")
+        if isinstance(chunk_size, int) and chunk_size > 0:
+            return chunk_size
+
+    return Config.LONG_TEXT_CHUNK_SIZE
diff --git a/app/core/text_processing.py b/app/core/text_processing.py
index d90c90f..9c2a71c 100644
--- a/app/core/text_processing.py
+++ b/app/core/text_processing.py
@@ -380,57 +380,158 @@ def concatenate_audio_chunks(audio_chunks: list, sample_rate: int) -> torch.Tens
     return concatenated
 
 
-def split_text_for_long_generation(text: str,
-                                   max_chunk_size: Optional[int] = None,
-                                   overlap_chars: int = 0) -> List[LongTextChunk]:
-    """
-    Split long text into chunks optimized for TTS generation with intelligent boundaries.
+def chunk_text(text: str, strategy: str = "sentence", max_length: Optional[int] = None) -> List[str]:
+    """Split text according to the requested strategy."""
 
-    This function implements a hierarchical splitting strategy:
-    1. First attempt: Split at paragraph boundaries (double newlines)
-    2. Second attempt: Split at sentence boundaries (. ! ?)
-    3. Third attempt: Split at clause boundaries (, ; : - —)
-    4. Last resort: Split at word boundaries
+    if max_length is None or max_length <= 0:
+        max_length = Config.LONG_TEXT_CHUNK_SIZE
 
-    Args:
-        text: Input text to split (should be > 3000 characters)
-        max_chunk_size: Maximum characters per chunk (defaults to Config.LONG_TEXT_CHUNK_SIZE)
-        overlap_chars: Number of characters to overlap between chunks for context
+    # Respect the standard TTS hard limit with a small buffer to avoid boundary errors
+    effective_max = min(max_length, Config.MAX_TOTAL_LENGTH - 100)
+    if effective_max <= 0:
+        effective_max = max_length
 
-    Returns:
-        List of LongTextChunk objects with metadata
-    """
-    if max_chunk_size is None:
-        max_chunk_size = Config.LONG_TEXT_CHUNK_SIZE
+    cleaned = text.strip()
+    if not cleaned:
+        return []
 
-    # Ensure we don't exceed the regular TTS limit
-    effective_max = min(max_chunk_size, Config.MAX_TOTAL_LENGTH - 100)  # Leave some buffer
+    normalized_strategy = (strategy or "sentence").lower()
 
-    chunks = []
-    chunk_index = 0
+    if normalized_strategy == "fixed":
+        return [
+            chunk.strip()
+            for chunk in (cleaned[i : i + effective_max] for i in range(0, len(cleaned), effective_max))
+            if chunk.strip()
+        ]
+
+    if normalized_strategy == "paragraph":
+        return _chunk_by_paragraphs(cleaned, effective_max)
+
+    if normalized_strategy == "word":
+        return _chunk_by_words(cleaned, effective_max)
+
+    # Default strategy combines hierarchical paragraph/sentence/word splitting
+    return _chunk_hierarchical(cleaned, effective_max)
+
+
+def _chunk_by_paragraphs(text: str, max_length: int) -> List[str]:
+    """Chunk text prioritising paragraph boundaries."""
+
+    paragraphs = [segment.strip() for segment in re.split(r"\n\s*\n", text) if segment.strip()]
+    if not paragraphs:
+        return _chunk_hierarchical(text, max_length)
+
+    chunks: List[str] = []
+    current: Optional[str] = None
+
+    for paragraph in paragraphs:
+        if current is None:
+            current = paragraph
+            continue
+
+        candidate = f"{current}\n\n{paragraph}"
+        if len(candidate) <= max_length:
+            current = candidate
+        else:
+            chunks.extend(_chunk_hierarchical(current, max_length))
+            current = paragraph
+
+    if current:
+        chunks.extend(_chunk_hierarchical(current, max_length))
+
+    return chunks
+
+
+def _chunk_by_words(text: str, max_length: int) -> List[str]:
+    """Chunk text using word boundaries."""
+
+    words = text.split()
+    if not words:
+        return []
+
+    chunks: List[str] = []
+    current_words: List[str] = []
+    current_length = 0
+
+    for word in words:
+        # Include a space when the current chunk already has words
+        additional_length = len(word) if not current_words else len(word) + 1
+
+        if current_words and current_length + additional_length > max_length:
+            chunk = " ".join(current_words).strip()
+            if chunk:
+                chunks.append(chunk)
+            current_words = [word]
+            current_length = len(word)
+        else:
+            current_words.append(word)
+            current_length += additional_length
+
+    if current_words:
+        chunk = " ".join(current_words).strip()
+        if chunk:
+            chunks.append(chunk)
+
+    refined_chunks: List[str] = []
+    for chunk in chunks:
+        if len(chunk) > max_length:
+            refined_chunks.extend(_chunk_hierarchical(chunk, max_length))
+        else:
+            refined_chunks.append(chunk)
+
+    return refined_chunks
+
+
+def _chunk_hierarchical(text: str, max_length: int) -> List[str]:
+    """Hierarchical chunking that mirrors the legacy behaviour."""
+
+    chunks: List[str] = []
     remaining_text = text.strip()
 
     while remaining_text:
-        if len(remaining_text) <= effective_max:
-            # Last chunk
-            chunk_text = remaining_text
-            remaining_text = ""
-        else:
-            # Find the best split point
-            chunk_text, remaining_text = _find_best_split_point(
-                remaining_text, effective_max, overlap_chars
-            )
+        if len(remaining_text) <= max_length:
+            chunks.append(remaining_text)
+            break
 
-        # Create chunk metadata
-        chunk = LongTextChunk(
-            index=chunk_index,
-            text=chunk_text,
-            text_preview=chunk_text[:50] + ("..." if len(chunk_text) > 50 else ""),
-            character_count=len(chunk_text)
-        )
+        chunk_text, remaining = _find_best_split_point(remaining_text, max_length, 0)
+
+        if not chunk_text:
+            chunk_text = remaining_text[:max_length].strip()
+            remaining = remaining_text[max_length:].strip()
+
+        chunks.append(chunk_text)
+        remaining_text = remaining
 
-        chunks.append(chunk)
-        chunk_index += 1
+    return [chunk for chunk in chunks if chunk]
+
+
+def split_text_for_long_generation(
+    text: str,
+    max_chunk_size: Optional[int] = None,
+    strategy: Optional[str] = None,
+) -> List[LongTextChunk]:
+    """Split long text into structured chunks ready for generation."""
+
+    if max_chunk_size is None or max_chunk_size <= 0:
+        max_chunk_size = Config.LONG_TEXT_CHUNK_SIZE
+
+    resolved_strategy = strategy or Config.LONG_TEXT_CHUNKING_STRATEGY
+
+    chunk_strings = chunk_text(text, strategy=resolved_strategy, max_length=max_chunk_size)
+    if not chunk_strings:
+        return []
+
+    chunks: List[LongTextChunk] = []
+    for index, chunk_body in enumerate(chunk_strings):
+        preview = chunk_body[:50] + ("..." if len(chunk_body) > 50 else "")
+        chunks.append(
+            LongTextChunk(
+                index=index,
+                text=chunk_body,
+                text_preview=preview,
+                character_count=len(chunk_body),
+            )
+        )
 
     return chunks
 
@@ -562,13 +663,18 @@ def _split_at_words(text: str, max_length: int, overlap_chars: int) -> Tuple[str
     return chunk_text, remaining_text
 
 
-def estimate_processing_time(text_length: int, avg_chars_per_second: float = 25.0) -> int:
+def estimate_processing_time(
+    text_length: int,
+    avg_chars_per_second: float = 25.0,
+    chunk_size: Optional[int] = None,
+) -> int:
     """
     Estimate processing time for long text TTS generation.
 
     Args:
         text_length: Total characters in text
         avg_chars_per_second: Average processing rate (characters per second)
+        chunk_size: Optional chunk size override used for estimation
 
     Returns:
         Estimated processing time in seconds
@@ -576,8 +682,12 @@ def estimate_processing_time(text_length: int, avg_chars_per_second: float = 25.
     # Base estimate + overhead for chunking and concatenation
     base_time = text_length / avg_chars_per_second
 
+    effective_chunk_size = chunk_size or Config.LONG_TEXT_CHUNK_SIZE
+    if effective_chunk_size <= 0:
+        effective_chunk_size = Config.LONG_TEXT_CHUNK_SIZE
+
     # Add overhead: 5 seconds for setup + 2 seconds per chunk + 10 seconds for concatenation
-    num_chunks = max(1, (text_length + Config.LONG_TEXT_CHUNK_SIZE - 1) // Config.LONG_TEXT_CHUNK_SIZE)
+    num_chunks = max(1, (text_length + effective_chunk_size - 1) // effective_chunk_size)
     overhead = 5 + (num_chunks * 2) + 10
 
     return int(base_time + overhead)
@@ -594,12 +704,19 @@ def validate_long_text_input(text: str) -> Tuple[bool, str]:
         return False, "Input text cannot be empty"
 
     text_length = len(text.strip())
+    min_length = Config.get_long_text_min_length()
+    max_length = Config.get_long_text_max_length()
+
+    if text_length < min_length:
+        return False, (
+            "Text must be at least {} characters for long-text processing (received {} characters)".format(
+                min_length,
+                text_length,
+            )
+        )
 
-    if text_length <= Config.MAX_TOTAL_LENGTH:
-        return False, f"Text is {text_length} characters. Use regular TTS for texts under {Config.MAX_TOTAL_LENGTH} characters"
-
-    if text_length > Config.LONG_TEXT_MAX_LENGTH:
-        return False, f"Text is too long ({text_length} characters). Maximum allowed: {Config.LONG_TEXT_MAX_LENGTH}"
+    if text_length > max_length:
+        return False, f"Text is too long ({text_length} characters). Maximum allowed: {max_length}"
 
     # Check for excessive repetition (potential spam/abuse)
     words = text.split()
diff --git a/app/models/long_text.py b/app/models/long_text.py
index 08379e2..636c81f 100644
--- a/app/models/long_text.py
+++ b/app/models/long_text.py
@@ -4,10 +4,12 @@
 
 from datetime import datetime
 from enum import Enum
-from typing import Optional, Dict, Any, List
+from typing import Optional, Dict, Any, List, Literal
 from pydantic import BaseModel, Field, field_validator
 from uuid import UUID
 
+from app.config import Config
+
 
 class LongTextJobStatus(str, Enum):
     """Status enum for long text jobs"""
@@ -28,20 +30,116 @@ class LongTextJobActionType(str, Enum):
 
 class LongTextRequest(BaseModel):
     """Request model for long text TTS generation"""
-    input: str = Field(..., min_length=3001, description="Text to convert to speech (must be > 3000 characters)")
+    input: str = Field(
+        ..., description="Text to convert to speech (must meet the configured minimum length)"
+    )
     voice: Optional[str] = Field(None, description="Voice name from library or OpenAI voice name")
     response_format: Optional[str] = Field("mp3", description="Audio format (mp3 or wav)")
     exaggeration: Optional[float] = Field(None, ge=0.25, le=2.0, description="Emotion intensity")
     cfg_weight: Optional[float] = Field(None, ge=0.0, le=1.0, description="Pace control")
     temperature: Optional[float] = Field(None, ge=0.05, le=5.0, description="Sampling temperature")
     session_id: Optional[str] = Field(None, description="Frontend session ID for tracking")
+    chunking_strategy: Optional[Literal["sentence", "paragraph", "word", "fixed"]] = Field(
+        None, description="Strategy to use when chunking the text"
+    )
+    quality_preset: Optional[Literal["fast", "balanced", "high"]] = Field(
+        None, description="Quality preset balancing speed vs fidelity"
+    )
+    chunk_size: Optional[int] = Field(
+        None,
+        gt=0,
+        description="Custom chunk size override (takes precedence over presets and defaults)",
+    )
+    silence_padding_ms: Optional[int] = Field(
+        None,
+        ge=0,
+        description="Silence padding between chunks in milliseconds",
+    )
+    enable_pauses: Optional[bool] = Field(
+        None,
+        description="Enable punctuation-based pauses when generating chunk audio",
+    )
+    custom_pauses: Optional[Dict[str, int]] = Field(
+        None,
+        description="Custom pause patterns and durations in milliseconds",
+    )
 
     @field_validator('input')
     @classmethod
-    def validate_input_length(cls, v):
-        if len(v) > 100000:  # Will be validated against Config.LONG_TEXT_MAX_LENGTH at runtime
-            raise ValueError('Input text exceeds maximum length of 100000 characters')
-        return v.strip()
+    def validate_input_length(cls, v: str) -> str:
+        cleaned = v.strip()
+        text_length = len(cleaned)
+        min_length = Config.get_long_text_min_length()
+        max_length = Config.get_long_text_max_length()
+
+        if text_length < min_length:
+            raise ValueError(
+                f"Input text must be at least {min_length} characters for long text processing"
+            )
+
+        if text_length > max_length:
+            raise ValueError(
+                f"Input text exceeds maximum length of {max_length} characters"
+            )
+
+        return cleaned
+
+    def get_chunking_strategy(self) -> str:
+        """Return the requested chunking strategy with configuration fallback."""
+
+        return self.chunking_strategy or Config.LONG_TEXT_CHUNKING_STRATEGY
+
+    def get_quality_preset(self) -> str:
+        """Return the requested quality preset with configuration fallback."""
+
+        return self.quality_preset or Config.LONG_TEXT_QUALITY_PRESET
+
+    def get_chunk_size(self, preset_config: Dict[str, Any]) -> int:
+        """Resolve the chunk size using custom value, preset, then config."""
+
+        if self.chunk_size:
+            return self.chunk_size
+        return int(preset_config.get("chunk_size", Config.LONG_TEXT_CHUNK_SIZE))
+
+    def get_silence_padding(self) -> int:
+        """Resolve the silence padding with fallback to configuration."""
+
+        if self.silence_padding_ms is not None:
+            return self.silence_padding_ms
+        return Config.LONG_TEXT_SILENCE_PADDING_MS
+
+    def resolve_pause_settings(self) -> Dict[str, Any]:
+        """Return pause handling configuration with defaults applied."""
+
+        enable = (
+            self.enable_pauses
+            if self.enable_pauses is not None
+            else Config.ENABLE_PUNCTUATION_PAUSES
+        )
+        return {
+            "enable": bool(enable),
+            "custom": self.custom_pauses or None,
+        }
+
+    @field_validator('custom_pauses')
+    @classmethod
+    def validate_custom_pauses(cls, value: Optional[Dict[str, Any]]):
+        if value is None:
+            return value
+
+        cleaned: Dict[str, int] = {}
+        for key, duration in value.items():
+            if duration is None:
+                raise ValueError(f'Pause duration for {key!r} cannot be None')
+            try:
+                int_duration = int(duration)
+            except (TypeError, ValueError) as exc:
+                raise ValueError(f'Invalid pause duration for {key!r}: {duration!r}') from exc
+            if int_duration < 0:
+                raise ValueError(f'Pause duration for {key!r} must be non-negative')
+            cleaned[str(key)] = int_duration
+
+        return cleaned
 
 
 class LongTextChunk(BaseModel):
@@ -63,7 +161,7 @@ class LongTextJobMetadata(BaseModel):
     created_at: datetime = Field(default_factory=datetime.utcnow)
     updated_at: datetime = Field(default_factory=datetime.utcnow)
     status: LongTextJobStatus = Field(default=LongTextJobStatus.PENDING)
-    text_length: int = Field(..., ge=3001, description="Total characters in input text")
+    text_length: int = Field(..., ge=1, description="Total characters in input text")
     text_hash: str = Field(..., description="SHA256 hash of input text for deduplication")
     total_chunks: int = Field(..., ge=1, description="Total number of chunks")
     completed_chunks: int = Field(default=0, ge=0, description="Number of completed chunks")
diff --git a/app/models/requests.py b/app/models/requests.py
index e4cd6ad..8ef304f 100644
--- a/app/models/requests.py
+++ b/app/models/requests.py
@@ -1,8 +1,7 @@
-"""
-Request models for API validation
-"""
+"""Request models for API validation"""
+
+from typing import Dict, Optional
 
-from typing import Optional
 from pydantic import BaseModel, Field, validator
 
 
@@ -25,6 +24,16 @@ class TTSRequest(BaseModel):
     streaming_strategy: Optional[str] = Field(None, description="Chunking strategy for streaming")
     streaming_buffer_size: Optional[int] = Field(None, description="Number of chunks to buffer", ge=1, le=10)
     streaming_quality: Optional[str] = Field(None, description="Speed vs quality trade-off")
+
+    # Pause handling parameters
+    enable_pauses: Optional[bool] = Field(
+        None,
+        description="Enable punctuation-based pauses (defaults to server configuration)",
+    )
+    custom_pauses: Optional[Dict[str, int]] = Field(
+        None,
+        description="Custom pause durations in milliseconds keyed by punctuation",
+    )
     
     @validator('input')
     def validate_input(cls, v):
@@ -54,4 +63,23 @@ def validate_streaming_quality(cls, v):
             allowed_qualities = ['fast', 'balanced', 'high']
             if v not in allowed_qualities:
                 raise ValueError(f'streaming_quality must be one of: {", ".join(allowed_qualities)}')
-        return v 
\ No newline at end of file
+        return v
+
+    @validator('custom_pauses')
+    def validate_custom_pauses(cls, value):
+        if value is None:
+            return value
+
+        cleaned: Dict[str, int] = {}
+        for key, duration in value.items():
+            if duration is None:
+                raise ValueError('custom pause duration cannot be None')
+            try:
+                int_duration = int(duration)
+            except (TypeError, ValueError) as exc:
+                raise ValueError(f'invalid pause duration for {key!r}: {duration!r}') from exc
+            if int_duration < 0:
+                raise ValueError(f'pause duration for {key!r} must be non-negative')
+            cleaned[str(key)] = int_duration
+
+        return cleaned
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index a056a48..888eb64 100755
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -63,6 +63,7 @@ ENV LONG_TEXT_CHUNK_SIZE=2500
 ENV LONG_TEXT_SILENCE_PADDING_MS=200
 ENV LONG_TEXT_JOB_RETENTION_DAYS=7
 ENV LONG_TEXT_MAX_CONCURRENT_JOBS=3
+ENV LONG_TEXT_MIN_LENGTH=100
 
 # Expose port
 EXPOSE ${PORT}
@@ -72,4 +73,4 @@ HEALTHCHECK --interval=30s --timeout=30s --start-period=5m --retries=3 \
     CMD curl -f http://localhost:${PORT}/health || exit 1
 
 # Run the application with the new entry point
-CMD ["python", "main.py"] 
\ No newline at end of file
+CMD ["python", "main.py"] 
diff --git a/docker/Dockerfile.blackwell b/docker/Dockerfile.blackwell
index 0c7d963..089f979 100644
--- a/docker/Dockerfile.blackwell
+++ b/docker/Dockerfile.blackwell
@@ -88,6 +88,7 @@ ENV LONG_TEXT_CHUNK_SIZE=2500
 ENV LONG_TEXT_SILENCE_PADDING_MS=200
 ENV LONG_TEXT_JOB_RETENTION_DAYS=7
 ENV LONG_TEXT_MAX_CONCURRENT_JOBS=3
+ENV LONG_TEXT_MIN_LENGTH=100
 
 # NVIDIA/CUDA environment variables
 ENV NVIDIA_VISIBLE_DEVICES=all
@@ -101,4 +102,4 @@ HEALTHCHECK --interval=30s --timeout=30s --start-period=5m --retries=3 \
     CMD curl -f http://localhost:${PORT}/health || exit 1
 
 # Run the application using the new entry point
-CMD ["python", "main.py"] 
\ No newline at end of file
+CMD ["python", "main.py"] 
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index 9502a22..e81d0ff 100755
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -63,6 +63,7 @@ ENV LONG_TEXT_CHUNK_SIZE=2500
 ENV LONG_TEXT_SILENCE_PADDING_MS=200
 ENV LONG_TEXT_JOB_RETENTION_DAYS=7
 ENV LONG_TEXT_MAX_CONCURRENT_JOBS=3
+ENV LONG_TEXT_MIN_LENGTH=100
 
 # Expose port
 EXPOSE ${PORT}
@@ -72,4 +73,4 @@ HEALTHCHECK --interval=30s --timeout=30s --start-period=5m --retries=3 \
     CMD curl -f http://localhost:${PORT}/health || exit 1
 
 # Run the application with the new entry point
-CMD ["python", "main.py"] 
\ No newline at end of file
+CMD ["python", "main.py"] 
diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu
index 0bea747..b3dcd94 100755
--- a/docker/Dockerfile.gpu
+++ b/docker/Dockerfile.gpu
@@ -71,6 +71,7 @@ ENV LONG_TEXT_CHUNK_SIZE=2500
 ENV LONG_TEXT_SILENCE_PADDING_MS=200
 ENV LONG_TEXT_JOB_RETENTION_DAYS=7
 ENV LONG_TEXT_MAX_CONCURRENT_JOBS=3
+ENV LONG_TEXT_MIN_LENGTH=100
 
 # NVIDIA/CUDA environment variables
 ENV NVIDIA_VISIBLE_DEVICES=all
@@ -84,4 +85,4 @@ HEALTHCHECK --interval=30s --timeout=30s --start-period=5m --retries=3 \
     CMD curl -f http://localhost:${PORT}/health || exit 1
 
 # Run the application with the new entry point
-CMD ["python", "main.py"] 
\ No newline at end of file
+CMD ["python", "main.py"] 
diff --git a/docker/Dockerfile.uv b/docker/Dockerfile.uv
index 80d4363..923d796 100644
--- a/docker/Dockerfile.uv
+++ b/docker/Dockerfile.uv
@@ -79,6 +79,7 @@ ENV LONG_TEXT_CHUNK_SIZE=2500
 ENV LONG_TEXT_SILENCE_PADDING_MS=200
 ENV LONG_TEXT_JOB_RETENTION_DAYS=7
 ENV LONG_TEXT_MAX_CONCURRENT_JOBS=3
+ENV LONG_TEXT_MIN_LENGTH=100
 
 # Expose port
 EXPOSE ${PORT}
@@ -88,4 +89,4 @@ HEALTHCHECK --interval=30s --timeout=30s --start-period=5m --retries=3 \
     CMD curl -f http://localhost:${PORT}/health || exit 1
 
 # Run the application using the new entry point
-CMD ["python", "main.py"] 
\ No newline at end of file
+CMD ["python", "main.py"] 
diff --git a/docker/Dockerfile.uv.gpu b/docker/Dockerfile.uv.gpu
index fd95717..fe6138a 100644
--- a/docker/Dockerfile.uv.gpu
+++ b/docker/Dockerfile.uv.gpu
@@ -78,6 +78,7 @@ ENV LONG_TEXT_CHUNK_SIZE=2500
 ENV LONG_TEXT_SILENCE_PADDING_MS=200
 ENV LONG_TEXT_JOB_RETENTION_DAYS=7
 ENV LONG_TEXT_MAX_CONCURRENT_JOBS=3
+ENV LONG_TEXT_MIN_LENGTH=100
 
 # NVIDIA/CUDA environment variables
 ENV NVIDIA_VISIBLE_DEVICES=all
@@ -91,4 +92,4 @@ HEALTHCHECK --interval=30s --timeout=30s --start-period=5m --retries=3 \
     CMD curl -f http://localhost:${PORT}/health || exit 1
 
 # Run the application using the virtual environment Python
-CMD ["/app/.venv/bin/python", "main.py"] 
\ No newline at end of file
+CMD ["/app/.venv/bin/python", "main.py"] 
diff --git a/docker/docker-compose.blackwell.yml b/docker/docker-compose.blackwell.yml
index 240bcf1..7716ca8 100644
--- a/docker/docker-compose.blackwell.yml
+++ b/docker/docker-compose.blackwell.yml
@@ -23,6 +23,35 @@ services:
       - MAX_CHUNK_LENGTH=${MAX_CHUNK_LENGTH:-280}
       - MAX_TOTAL_LENGTH=${MAX_TOTAL_LENGTH:-3000}
 
+      # Long Text TTS Settings
+      - LONG_TEXT_DATA_DIR=${LONG_TEXT_DATA_DIR:-/data/long_text_jobs}
+      - LONG_TEXT_MAX_LENGTH=${LONG_TEXT_MAX_LENGTH:-100000}
+      - LONG_TEXT_CHUNK_SIZE=${LONG_TEXT_CHUNK_SIZE:-2500}
+      - LONG_TEXT_SILENCE_PADDING_MS=${LONG_TEXT_SILENCE_PADDING_MS:-200}
+      - LONG_TEXT_JOB_RETENTION_DAYS=${LONG_TEXT_JOB_RETENTION_DAYS:-7}
+      - LONG_TEXT_MAX_CONCURRENT_JOBS=${LONG_TEXT_MAX_CONCURRENT_JOBS:-3}
+      - LONG_TEXT_MIN_LENGTH=100
+      - LONG_TEXT_CHUNKING_STRATEGY=${LONG_TEXT_CHUNKING_STRATEGY:-sentence}
+      - LONG_TEXT_QUALITY_PRESET=${LONG_TEXT_QUALITY_PRESET:-balanced}
+      - QUALITY_FAST_CHUNK_SIZE=${QUALITY_FAST_CHUNK_SIZE:-1500}
+      - QUALITY_FAST_CFG_WEIGHT=${QUALITY_FAST_CFG_WEIGHT:-0.3}
+      - QUALITY_FAST_TEMPERATURE=${QUALITY_FAST_TEMPERATURE:-0.6}
+      - QUALITY_BALANCED_CHUNK_SIZE=${QUALITY_BALANCED_CHUNK_SIZE:-2500}
+      - QUALITY_BALANCED_CFG_WEIGHT=${QUALITY_BALANCED_CFG_WEIGHT:-0.5}
+      - QUALITY_BALANCED_TEMPERATURE=${QUALITY_BALANCED_TEMPERATURE:-0.8}
+      - QUALITY_HIGH_CHUNK_SIZE=${QUALITY_HIGH_CHUNK_SIZE:-2800}
+      - QUALITY_HIGH_CFG_WEIGHT=${QUALITY_HIGH_CFG_WEIGHT:-0.7}
+      - QUALITY_HIGH_TEMPERATURE=${QUALITY_HIGH_TEMPERATURE:-1.0}
+      - ENABLE_PUNCTUATION_PAUSES=${ENABLE_PUNCTUATION_PAUSES:-true}
+      - ELLIPSIS_PAUSE_MS=${ELLIPSIS_PAUSE_MS:-800}
+      - EM_DASH_PAUSE_MS=${EM_DASH_PAUSE_MS:-550}
+      - EN_DASH_PAUSE_MS=${EN_DASH_PAUSE_MS:-375}
+      - PERIOD_PAUSE_MS=${PERIOD_PAUSE_MS:-500}
+      - PARAGRAPH_PAUSE_MS=${PARAGRAPH_PAUSE_MS:-800}
+      - LINE_BREAK_PAUSE_MS=${LINE_BREAK_PAUSE_MS:-350}
+      - MIN_PAUSE_MS=${MIN_PAUSE_MS:-200}
+      - MAX_PAUSE_MS=${MAX_PAUSE_MS:-2000}
+
       # Voice and Model Settings
       - VOICE_SAMPLE_PATH=/app/voice-sample.mp3
       - DEVICE=${DEVICE:-cuda}
diff --git a/docker/docker-compose.cpu.yml b/docker/docker-compose.cpu.yml
index 482e2a9..8dc49c7 100755
--- a/docker/docker-compose.cpu.yml
+++ b/docker/docker-compose.cpu.yml
@@ -30,6 +30,26 @@ services:
       - LONG_TEXT_SILENCE_PADDING_MS=${LONG_TEXT_SILENCE_PADDING_MS:-200}
       - LONG_TEXT_JOB_RETENTION_DAYS=${LONG_TEXT_JOB_RETENTION_DAYS:-7}
       - LONG_TEXT_MAX_CONCURRENT_JOBS=${LONG_TEXT_MAX_CONCURRENT_JOBS:-3}
+      - LONG_TEXT_MIN_LENGTH=${LONG_TEXT_MIN_LENGTH:-100}
+      - LONG_TEXT_CHUNKING_STRATEGY=${LONG_TEXT_CHUNKING_STRATEGY:-sentence}
+      - LONG_TEXT_QUALITY_PRESET=${LONG_TEXT_QUALITY_PRESET:-balanced}
+      - QUALITY_FAST_CHUNK_SIZE=${QUALITY_FAST_CHUNK_SIZE:-1500}
+      - QUALITY_FAST_CFG_WEIGHT=${QUALITY_FAST_CFG_WEIGHT:-0.3}
+      - QUALITY_FAST_TEMPERATURE=${QUALITY_FAST_TEMPERATURE:-0.6}
+      - QUALITY_BALANCED_CHUNK_SIZE=${QUALITY_BALANCED_CHUNK_SIZE:-2500}
+      - QUALITY_BALANCED_CFG_WEIGHT=${QUALITY_BALANCED_CFG_WEIGHT:-0.5}
+      - QUALITY_BALANCED_TEMPERATURE=${QUALITY_BALANCED_TEMPERATURE:-0.8}
+      - QUALITY_HIGH_CHUNK_SIZE=${QUALITY_HIGH_CHUNK_SIZE:-2800}
+      - QUALITY_HIGH_CFG_WEIGHT=${QUALITY_HIGH_CFG_WEIGHT:-0.7}
+      - QUALITY_HIGH_TEMPERATURE=${QUALITY_HIGH_TEMPERATURE:-1.0}
+      - ENABLE_PUNCTUATION_PAUSES=${ENABLE_PUNCTUATION_PAUSES:-true}
+      - ELLIPSIS_PAUSE_MS=${ELLIPSIS_PAUSE_MS:-600}
+      - EM_DASH_PAUSE_MS=${EM_DASH_PAUSE_MS:-400}
+      - EN_DASH_PAUSE_MS=${EN_DASH_PAUSE_MS:-350}
+      - PARAGRAPH_PAUSE_MS=${PARAGRAPH_PAUSE_MS:-500}
+      - LINE_BREAK_PAUSE_MS=${LINE_BREAK_PAUSE_MS:-250}
+      - MIN_PAUSE_MS=${MIN_PAUSE_MS:-100}
+      - MAX_PAUSE_MS=${MAX_PAUSE_MS:-2000}
 
       # Voice and Model Settings
       - VOICE_SAMPLE_PATH=/app/voice-sample.mp3
diff --git a/docker/docker-compose.gpu.yml b/docker/docker-compose.gpu.yml
index b37782f..57835f0 100755
--- a/docker/docker-compose.gpu.yml
+++ b/docker/docker-compose.gpu.yml
@@ -30,6 +30,26 @@ services:
       - LONG_TEXT_SILENCE_PADDING_MS=${LONG_TEXT_SILENCE_PADDING_MS:-200}
       - LONG_TEXT_JOB_RETENTION_DAYS=${LONG_TEXT_JOB_RETENTION_DAYS:-7}
       - LONG_TEXT_MAX_CONCURRENT_JOBS=${LONG_TEXT_MAX_CONCURRENT_JOBS:-3}
+      - LONG_TEXT_MIN_LENGTH=${LONG_TEXT_MIN_LENGTH:-100}
+      - LONG_TEXT_CHUNKING_STRATEGY=${LONG_TEXT_CHUNKING_STRATEGY:-sentence}
+      - LONG_TEXT_QUALITY_PRESET=${LONG_TEXT_QUALITY_PRESET:-balanced}
+      - QUALITY_FAST_CHUNK_SIZE=${QUALITY_FAST_CHUNK_SIZE:-1500}
+      - QUALITY_FAST_CFG_WEIGHT=${QUALITY_FAST_CFG_WEIGHT:-0.3}
+      - QUALITY_FAST_TEMPERATURE=${QUALITY_FAST_TEMPERATURE:-0.6}
+      - QUALITY_BALANCED_CHUNK_SIZE=${QUALITY_BALANCED_CHUNK_SIZE:-2500}
+      - QUALITY_BALANCED_CFG_WEIGHT=${QUALITY_BALANCED_CFG_WEIGHT:-0.5}
+      - QUALITY_BALANCED_TEMPERATURE=${QUALITY_BALANCED_TEMPERATURE:-0.8}
+      - QUALITY_HIGH_CHUNK_SIZE=${QUALITY_HIGH_CHUNK_SIZE:-2800}
+      - QUALITY_HIGH_CFG_WEIGHT=${QUALITY_HIGH_CFG_WEIGHT:-0.7}
+      - QUALITY_HIGH_TEMPERATURE=${QUALITY_HIGH_TEMPERATURE:-1.0}
+      - ENABLE_PUNCTUATION_PAUSES=${ENABLE_PUNCTUATION_PAUSES:-true}
+      - ELLIPSIS_PAUSE_MS=${ELLIPSIS_PAUSE_MS:-600}
+      - EM_DASH_PAUSE_MS=${EM_DASH_PAUSE_MS:-400}
+      - EN_DASH_PAUSE_MS=${EN_DASH_PAUSE_MS:-350}
+      - PARAGRAPH_PAUSE_MS=${PARAGRAPH_PAUSE_MS:-500}
+      - LINE_BREAK_PAUSE_MS=${LINE_BREAK_PAUSE_MS:-250}
+      - MIN_PAUSE_MS=${MIN_PAUSE_MS:-100}
+      - MAX_PAUSE_MS=${MAX_PAUSE_MS:-2000}
 
       # Voice and Model Settings
       - VOICE_SAMPLE_PATH=/app/voice-sample.mp3
diff --git a/docker/docker-compose.uv.gpu.yml b/docker/docker-compose.uv.gpu.yml
index a5c2586..3e66f94 100644
--- a/docker/docker-compose.uv.gpu.yml
+++ b/docker/docker-compose.uv.gpu.yml
@@ -30,6 +30,26 @@ services:
       - LONG_TEXT_SILENCE_PADDING_MS=${LONG_TEXT_SILENCE_PADDING_MS:-200}
       - LONG_TEXT_JOB_RETENTION_DAYS=${LONG_TEXT_JOB_RETENTION_DAYS:-7}
       - LONG_TEXT_MAX_CONCURRENT_JOBS=${LONG_TEXT_MAX_CONCURRENT_JOBS:-3}
+      - LONG_TEXT_MIN_LENGTH=${LONG_TEXT_MIN_LENGTH:-100}
+      - LONG_TEXT_CHUNKING_STRATEGY=${LONG_TEXT_CHUNKING_STRATEGY:-sentence}
+      - LONG_TEXT_QUALITY_PRESET=${LONG_TEXT_QUALITY_PRESET:-balanced}
+      - QUALITY_FAST_CHUNK_SIZE=${QUALITY_FAST_CHUNK_SIZE:-1500}
+      - QUALITY_FAST_CFG_WEIGHT=${QUALITY_FAST_CFG_WEIGHT:-0.3}
+      - QUALITY_FAST_TEMPERATURE=${QUALITY_FAST_TEMPERATURE:-0.6}
+      - QUALITY_BALANCED_CHUNK_SIZE=${QUALITY_BALANCED_CHUNK_SIZE:-2500}
+      - QUALITY_BALANCED_CFG_WEIGHT=${QUALITY_BALANCED_CFG_WEIGHT:-0.5}
+      - QUALITY_BALANCED_TEMPERATURE=${QUALITY_BALANCED_TEMPERATURE:-0.8}
+      - QUALITY_HIGH_CHUNK_SIZE=${QUALITY_HIGH_CHUNK_SIZE:-2800}
+      - QUALITY_HIGH_CFG_WEIGHT=${QUALITY_HIGH_CFG_WEIGHT:-0.7}
+      - QUALITY_HIGH_TEMPERATURE=${QUALITY_HIGH_TEMPERATURE:-1.0}
+      - ENABLE_PUNCTUATION_PAUSES=${ENABLE_PUNCTUATION_PAUSES:-true}
+      - ELLIPSIS_PAUSE_MS=${ELLIPSIS_PAUSE_MS:-600}
+      - EM_DASH_PAUSE_MS=${EM_DASH_PAUSE_MS:-400}
+      - EN_DASH_PAUSE_MS=${EN_DASH_PAUSE_MS:-350}
+      - PARAGRAPH_PAUSE_MS=${PARAGRAPH_PAUSE_MS:-500}
+      - LINE_BREAK_PAUSE_MS=${LINE_BREAK_PAUSE_MS:-250}
+      - MIN_PAUSE_MS=${MIN_PAUSE_MS:-100}
+      - MAX_PAUSE_MS=${MAX_PAUSE_MS:-2000}
 
       # Voice and Model Settings
       - VOICE_SAMPLE_PATH=/app/voice-sample.mp3
diff --git a/docker/docker-compose.uv.yml b/docker/docker-compose.uv.yml
index 5193b5d..212f72a 100644
--- a/docker/docker-compose.uv.yml
+++ b/docker/docker-compose.uv.yml
@@ -30,6 +30,26 @@ services:
       - LONG_TEXT_SILENCE_PADDING_MS=${LONG_TEXT_SILENCE_PADDING_MS:-200}
       - LONG_TEXT_JOB_RETENTION_DAYS=${LONG_TEXT_JOB_RETENTION_DAYS:-7}
       - LONG_TEXT_MAX_CONCURRENT_JOBS=${LONG_TEXT_MAX_CONCURRENT_JOBS:-3}
+      - LONG_TEXT_MIN_LENGTH=${LONG_TEXT_MIN_LENGTH:-100}
+      - LONG_TEXT_CHUNKING_STRATEGY=${LONG_TEXT_CHUNKING_STRATEGY:-sentence}
+      - LONG_TEXT_QUALITY_PRESET=${LONG_TEXT_QUALITY_PRESET:-balanced}
+      - QUALITY_FAST_CHUNK_SIZE=${QUALITY_FAST_CHUNK_SIZE:-1500}
+      - QUALITY_FAST_CFG_WEIGHT=${QUALITY_FAST_CFG_WEIGHT:-0.3}
+      - QUALITY_FAST_TEMPERATURE=${QUALITY_FAST_TEMPERATURE:-0.6}
+      - QUALITY_BALANCED_CHUNK_SIZE=${QUALITY_BALANCED_CHUNK_SIZE:-2500}
+      - QUALITY_BALANCED_CFG_WEIGHT=${QUALITY_BALANCED_CFG_WEIGHT:-0.5}
+      - QUALITY_BALANCED_TEMPERATURE=${QUALITY_BALANCED_TEMPERATURE:-0.8}
+      - QUALITY_HIGH_CHUNK_SIZE=${QUALITY_HIGH_CHUNK_SIZE:-2800}
+      - QUALITY_HIGH_CFG_WEIGHT=${QUALITY_HIGH_CFG_WEIGHT:-0.7}
+      - QUALITY_HIGH_TEMPERATURE=${QUALITY_HIGH_TEMPERATURE:-1.0}
+      - ENABLE_PUNCTUATION_PAUSES=${ENABLE_PUNCTUATION_PAUSES:-true}
+      - ELLIPSIS_PAUSE_MS=${ELLIPSIS_PAUSE_MS:-600}
+      - EM_DASH_PAUSE_MS=${EM_DASH_PAUSE_MS:-400}
+      - EN_DASH_PAUSE_MS=${EN_DASH_PAUSE_MS:-350}
+      - PARAGRAPH_PAUSE_MS=${PARAGRAPH_PAUSE_MS:-500}
+      - LINE_BREAK_PAUSE_MS=${LINE_BREAK_PAUSE_MS:-250}
+      - MIN_PAUSE_MS=${MIN_PAUSE_MS:-100}
+      - MAX_PAUSE_MS=${MAX_PAUSE_MS:-2000}
 
       # Voice and Model Settings
       - VOICE_SAMPLE_PATH=/app/voice-sample.mp3
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index cdf18ee..cf697f8 100755
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -30,6 +30,26 @@ services:
       - LONG_TEXT_SILENCE_PADDING_MS=${LONG_TEXT_SILENCE_PADDING_MS:-200}
       - LONG_TEXT_JOB_RETENTION_DAYS=${LONG_TEXT_JOB_RETENTION_DAYS:-7}
       - LONG_TEXT_MAX_CONCURRENT_JOBS=${LONG_TEXT_MAX_CONCURRENT_JOBS:-3}
+      - LONG_TEXT_MIN_LENGTH=${LONG_TEXT_MIN_LENGTH:-100}
+      - LONG_TEXT_CHUNKING_STRATEGY=${LONG_TEXT_CHUNKING_STRATEGY:-sentence}
+      - LONG_TEXT_QUALITY_PRESET=${LONG_TEXT_QUALITY_PRESET:-balanced}
+      - QUALITY_FAST_CHUNK_SIZE=${QUALITY_FAST_CHUNK_SIZE:-1500}
+      - QUALITY_FAST_CFG_WEIGHT=${QUALITY_FAST_CFG_WEIGHT:-0.3}
+      - QUALITY_FAST_TEMPERATURE=${QUALITY_FAST_TEMPERATURE:-0.6}
+      - QUALITY_BALANCED_CHUNK_SIZE=${QUALITY_BALANCED_CHUNK_SIZE:-2500}
+      - QUALITY_BALANCED_CFG_WEIGHT=${QUALITY_BALANCED_CFG_WEIGHT:-0.5}
+      - QUALITY_BALANCED_TEMPERATURE=${QUALITY_BALANCED_TEMPERATURE:-0.8}
+      - QUALITY_HIGH_CHUNK_SIZE=${QUALITY_HIGH_CHUNK_SIZE:-2800}
+      - QUALITY_HIGH_CFG_WEIGHT=${QUALITY_HIGH_CFG_WEIGHT:-0.7}
+      - QUALITY_HIGH_TEMPERATURE=${QUALITY_HIGH_TEMPERATURE:-1.0}
+      - ENABLE_PUNCTUATION_PAUSES=${ENABLE_PUNCTUATION_PAUSES:-true}
+      - ELLIPSIS_PAUSE_MS=${ELLIPSIS_PAUSE_MS:-600}
+      - EM_DASH_PAUSE_MS=${EM_DASH_PAUSE_MS:-400}
+      - EN_DASH_PAUSE_MS=${EN_DASH_PAUSE_MS:-350}
+      - PARAGRAPH_PAUSE_MS=${PARAGRAPH_PAUSE_MS:-500}
+      - LINE_BREAK_PAUSE_MS=${LINE_BREAK_PAUSE_MS:-250}
+      - MIN_PAUSE_MS=${MIN_PAUSE_MS:-100}
+      - MAX_PAUSE_MS=${MAX_PAUSE_MS:-2000}
 
       # Voice and Model Settings
       - VOICE_SAMPLE_PATH=/app/voice-sample.mp3
diff --git a/frontend/src/services/longTextTTS.ts b/frontend/src/services/longTextTTS.ts
index 688dd68..aa81ab7 100644
--- a/frontend/src/services/longTextTTS.ts
+++ b/frontend/src/services/longTextTTS.ts
@@ -47,6 +47,22 @@ export const createLongTextTTSService = (baseUrl: string, sessionId?: string) =>
         payload.response_format = request.output_format; // Backend expects 'response_format'
       }
 
+      if (request.silence_padding_ms !== undefined) {
+        payload.silence_padding_ms = request.silence_padding_ms;
+      }
+
+      if (request.chunking_strategy) {
+        payload.chunking_strategy = request.chunking_strategy;
+      }
+
+      if (request.quality_preset) {
+        payload.quality_preset = request.quality_preset;
+      }
+
+      if (request.chunk_size !== undefined) {
+        payload.chunk_size = request.chunk_size;
+      }
+
       // Add session ID for tracking
       if (sessionId) {
         payload.session_id = sessionId;
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
index 4d6fae9..f95bdfd 100644
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -10,6 +10,8 @@ export interface TTSRequest {
   streaming_chunk_size?: number;
   streaming_strategy?: 'sentence' | 'paragraph' | 'fixed' | 'word';
   streaming_quality?: 'fast' | 'balanced' | 'high';
+  enable_pauses?: boolean;
+  custom_pauses?: Record<string, number>;
 }
 
 export interface HealthResponse {
@@ -262,10 +264,16 @@ export interface LongTextJobMetadata {
   };
   voice: string;
   parameters: {
-    exaggeration: number;
-    cfg_weight: number;
-    temperature: number;
-    language: string;
+    exaggeration?: number;
+    cfg_weight?: number;
+    temperature?: number;
+    output_format?: string;
+    chunking_strategy?: 'sentence' | 'paragraph' | 'word' | 'fixed';
+    chunk_size?: number;
+    quality_preset?: 'fast' | 'balanced' | 'high';
+    silence_padding_ms?: number;
+    language?: string;
+    [key: string]: unknown;
   };
   processing: {
     started_at?: string;
@@ -328,7 +336,12 @@ export interface LongTextRequest {
   voice_file?: File;
   output_format?: 'mp3' | 'wav';
   silence_padding_ms?: number;
+  chunking_strategy?: 'sentence' | 'paragraph' | 'word' | 'fixed';
+  quality_preset?: 'fast' | 'balanced' | 'high';
+  chunk_size?: number;
   session_id?: string;
+  enable_pauses?: boolean;
+  custom_pauses?: Record<string, number>;
 }
 
 export interface LongTextSSEEvent {
diff --git a/tests/test_pause_handler.py b/tests/test_pause_handler.py
new file mode 100644
index 0000000..feb72de
--- /dev/null
+++ b/tests/test_pause_handler.py
@@ -0,0 +1,129 @@
+import pytest
+
+from app.core.pause_handler import PauseHandler, split_text_with_pauses, TextChunk
+
+
+class TestPauseHandler:
+    def test_basic_ellipsis_pause(self):
+        handler = PauseHandler()
+        chunks = handler.process("Hello... world")
+
+        assert len(chunks) == 2
+        assert chunks[0].text == "Hello"
+        assert chunks[0].pause_after_ms == 600
+        assert chunks[1].text == "world"
+        assert chunks[1].pause_after_ms == 0
+
+    def test_em_dash_pause(self):
+        handler = PauseHandler()
+        chunks = handler.process("Hello—world")
+
+        assert len(chunks) == 2
+        assert chunks[0].text == "Hello"
+        assert chunks[0].pause_after_ms == 400
+        assert chunks[1].text == "world"
+
+    def test_en_dash_pause(self):
+        handler = PauseHandler()
+        chunks = handler.process("Numbers 1–2")
+
+        assert len(chunks) == 2
+        assert chunks[0].text == "Numbers 1"
+        assert chunks[0].pause_after_ms == 350
+        assert chunks[1].text == "2"
+
+    def test_multiple_pauses(self):
+        handler = PauseHandler()
+        text = "Hello... I was thinking—maybe tomorrow?"
+        chunks = handler.process(text)
+
+        assert len(chunks) == 3
+        assert chunks[0].text == "Hello"
+        assert chunks[0].pause_after_ms == 600
+        assert chunks[1].text == "I was thinking"
+        assert chunks[1].pause_after_ms == 400
+        assert chunks[2].text == "maybe tomorrow?"
+        assert chunks[2].pause_after_ms == 0
+
+    def test_no_pauses_when_disabled(self):
+        handler = PauseHandler(enable_pauses=False)
+        chunks = handler.process("Hello... world—test")
+
+        assert len(chunks) == 1
+        assert chunks[0].text == "Hello... world—test"
+        assert chunks[0].pause_after_ms == 0
+
+    def test_line_break_pause(self):
+        handler = PauseHandler()
+        chunks = handler.process("Line one\nLine two")
+
+        assert len(chunks) == 2
+        assert chunks[0].text == "Line one"
+        assert chunks[0].pause_after_ms == 250
+        assert chunks[1].text == "Line two"
+
+    def test_paragraph_break_pause(self):
+        handler = PauseHandler()
+        chunks = handler.process("Paragraph one\n\nParagraph two")
+
+        assert len(chunks) == 2
+        assert chunks[0].text == "Paragraph one"
+        assert chunks[0].pause_after_ms == 500
+        assert chunks[1].text == "Paragraph two"
+
+    def test_custom_pause_durations(self):
+        custom = {r"\.\.\.": 1000}
+        handler = PauseHandler(custom_pauses=custom)
+        chunks = handler.process("Hello... world")
+
+        assert chunks[0].pause_after_ms == 1000
+
+    def test_pause_clamping(self):
+        handler = PauseHandler(min_pause_ms=200, max_pause_ms=500)
+        custom = {r"\.\.\.": 2000}
+        handler.pause_patterns.update(custom)
+
+        chunks = handler.process("Hello... world")
+        assert chunks[0].pause_after_ms == 500
+
+    def test_empty_text(self):
+        handler = PauseHandler()
+        chunks = handler.process("")
+
+        assert len(chunks) == 0
+
+    def test_no_pause_punctuation(self):
+        handler = PauseHandler()
+        chunks = handler.process("Hello world, how are you?")
+
+        assert len(chunks) == 1
+        assert chunks[0].text == "Hello world, how are you?"
+        assert chunks[0].pause_after_ms == 0
+
+    def test_estimate_total_pause_time(self):
+        handler = PauseHandler()
+        text = "Hello... world—test"
+        total_pause = handler.estimate_total_pause_time(text)
+
+        assert total_pause == 1000
+
+    def test_pause_summary(self):
+        handler = PauseHandler()
+        text = "Hello... world—test... again"
+        summary = handler.get_pause_summary(text)
+
+        assert summary['total_chunks'] == 4
+        assert summary['chunks_with_pauses'] == 3
+        assert summary['pause_types']['...'] == 2
+        assert summary['pause_types']['—'] == 1
+
+    def test_convenience_function(self):
+        chunks = split_text_with_pauses("Hello... world")
+
+        assert len(chunks) == 2
+        assert isinstance(chunks[0], TextChunk)
+        assert chunks[0].pause_after_ms == 600
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/unit_tests/test_long_text_limits.py b/unit_tests/test_long_text_limits.py
new file mode 100644
index 0000000..001da5a
--- /dev/null
+++ b/unit_tests/test_long_text_limits.py
@@ -0,0 +1,65 @@
+"""Unit tests for configurable long text limits"""
+
+from pathlib import Path
+from typing import Iterator
+import sys
+
+import pytest
+from pydantic import ValidationError
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from app.config import Config
+from app.core.text_processing import validate_long_text_input
+from app.models.long_text import LongTextRequest
+
+
+@pytest.fixture()
+def reset_long_text_limits() -> Iterator[None]:
+    """Restore Config long text limits after each test"""
+    original_min = Config.LONG_TEXT_MIN_LENGTH
+    original_max = Config.LONG_TEXT_MAX_LENGTH
+    yield
+    Config.LONG_TEXT_MIN_LENGTH = original_min
+    Config.LONG_TEXT_MAX_LENGTH = original_max
+
+
+def test_long_text_request_accepts_env_configured_min(monkeypatch: pytest.MonkeyPatch, reset_long_text_limits: None) -> None:
+    """Long text requests should accept inputs that meet the configured minimum"""
+    monkeypatch.setenv("LONG_TEXT_MIN_LENGTH", "100")
+    monkeypatch.setenv("LONG_TEXT_MAX_LENGTH", "1000")
+
+    sample_text = "x" * 150
+    request = LongTextRequest(input=sample_text)
+
+    assert request.input == sample_text
+
+
+def test_long_text_request_rejects_below_min(monkeypatch: pytest.MonkeyPatch, reset_long_text_limits: None) -> None:
+    """Validation should fail when text length is below the configured minimum"""
+    monkeypatch.setenv("LONG_TEXT_MIN_LENGTH", "200")
+    monkeypatch.setenv("LONG_TEXT_MAX_LENGTH", "1000")
+
+    with pytest.raises(ValidationError) as exc_info:
+        LongTextRequest(input="y" * 150)
+
+    assert "200" in str(exc_info.value)
+
+
+def test_validate_long_text_input_uses_configured_limits(monkeypatch: pytest.MonkeyPatch, reset_long_text_limits: None) -> None:
+    """Core validation should leverage runtime configuration for min and max lengths"""
+    monkeypatch.setenv("LONG_TEXT_MIN_LENGTH", "120")
+    monkeypatch.setenv("LONG_TEXT_MAX_LENGTH", "400")
+
+    is_valid, _ = validate_long_text_input("z" * 200)
+    assert is_valid
+
+    too_short_valid, too_short_message = validate_long_text_input("z" * 100)
+    assert not too_short_valid
+    assert "120" in too_short_message
+
+    too_long_valid, too_long_message = validate_long_text_input("z" * 450)
+    assert not too_long_valid
+    assert "400" in too_long_message