remsky
diff --git a/‎api/src/services/audio.py‎
Lines changed: 2 additions & 2 deletions b/‎api/src/services/audio.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎api/src/services/streaming_audio_writer.py‎
Lines changed: 20 additions & 4 deletions b/‎api/src/services/streaming_audio_writer.py‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎api/src/services/text_processing/normalizer.py‎
Lines changed: 24 additions & 2 deletions b/‎api/src/services/text_processing/normalizer.py‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎api/src/services/text_processing/phonemizer.py‎
Lines changed: 15 additions & 2 deletions b/‎api/src/services/text_processing/phonemizer.py‎
Lines changed: 15 additions & 2 deletions
@@ -80,12 +80,12 @@ def find_first_last_non_silent(
         non_silent_index_start, non_silent_index_end = None, None
 
         for X in range(0, len(audio_data)):
-            if audio_data[X] > amplitude_threshold:
+            if abs(audio_data[X]) > amplitude_threshold:
                 non_silent_index_start = X
                 break
 
         for X in range(len(audio_data) - 1, -1, -1):
-            if audio_data[X] > amplitude_threshold:
+            if abs(audio_data[X]) > amplitude_threshold:
                 non_silent_index_end = X
                 break
 
 
@@ -32,19 +32,29 @@ def __init__(self, format: str, sample_rate: int, channels: int = 1):
         if self.format in ["wav", "flac", "mp3", "pcm", "aac", "opus"]:
             if self.format != "pcm":
                 self.output_buffer = BytesIO()
+                container_options = {}
+                # Try disabling Xing VBR header for MP3 to fix iOS timeline reading issues
+                if self.format == 'mp3':
+                    # Disable Xing VBR header
+                    container_options = {'write_xing': '0'}
+                    logger.debug("Disabling Xing VBR header for MP3 encoding.")
+
                 self.container = av.open(
                     self.output_buffer,
                     mode="w",
                     format=self.format if self.format != "aac" else "adts",
+                    options=container_options # Pass options here
                 )
                 self.stream = self.container.add_stream(
                     codec_map[self.format],
-                    sample_rate=self.sample_rate,
+                    rate=self.sample_rate, # Correct parameter name is 'rate'
                     layout="mono" if self.channels == 1 else "stereo",
                 )
-                self.stream.bit_rate = 128000
+                # Set bit_rate only for codecs where it's applicable and useful
+                if self.format in ['mp3', 'aac', 'opus']:
+                    self.stream.bit_rate = 128000 # Example bitrate, can be configured
         else:
-            raise ValueError(f"Unsupported format: {format}")
+            raise ValueError(f"Unsupported format: {self.format}") # Use self.format here
 
     def close(self):
         if hasattr(self, "container"):
@@ -65,12 +75,18 @@ def write_chunk(
 
         if finalize:
             if self.format != "pcm":
+                # Flush stream encoder
                 packets = self.stream.encode(None)
                 for packet in packets:
                     self.container.mux(packet)
 
+                # Closing the container handles writing the trailer and finalizing the file.
+                # No explicit flush method is available or needed here.
+                logger.debug("Muxed final packets.")
+
+                # Get the final bytes from the buffer *before* closing it
                 data = self.output_buffer.getvalue()
-                self.close()
+                self.close() # Close container and buffer
                 return data
 
         if audio_data is None or len(audio_data) == 0:
 
@@ -391,6 +391,7 @@ def handle_time(t: re.Match[str]) -> str:
 
 def normalize_text(text: str, normalization_options: NormalizationOptions) -> str:
     """Normalize text for TTS processing"""
+    
     # Handle email addresses first if enabled
     if normalization_options.email_normalization:
         text = EMAIL_PATTERN.sub(handle_email, text)
@@ -415,7 +416,7 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
             text,
         )
 
-    # Replace quotes and brackets
+    # Replace quotes and brackets (additional cleanup)
     text = text.replace(chr(8216), "'").replace(chr(8217), "'")
     text = text.replace("«", chr(8220)).replace("»", chr(8221))
     text = text.replace(chr(8220), '"').replace(chr(8221), '"')
@@ -435,6 +436,11 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
     text = re.sub(r"  +", " ", text)
     text = re.sub(r"(?<=\n) +(?=\n)", "", text)
 
+    # Handle special characters that might cause audio artifacts first
+    # Replace newlines with spaces (or pauses if needed)
+    text = text.replace('\n', ' ')
+    text = text.replace('\r', ' ')
+    
     # Handle titles and abbreviations
     text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text)
     text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text)
@@ -445,7 +451,7 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
     # Handle common words
     text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
 
-    # Handle numbers and money
+    # Handle numbers and money BEFORE replacing special characters
     text = re.sub(r"(?<=\d),(?=\d)", "", text)
 
     text = MONEY_PATTERN.sub(
@@ -457,6 +463,22 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
 
     text = re.sub(r"\d*\.\d+", handle_decimal, text)
 
+    # Handle other problematic symbols AFTER money/number processing
+    text = text.replace('~', '')    # Remove tilde
+    text = text.replace('@', ' at ')  # At symbol
+    text = text.replace('#', ' number ')  # Hash/pound
+    text = text.replace('$', ' dollar ')  # Dollar sign (if not handled by money pattern)
+    text = text.replace('%', ' percent ')  # Percent sign
+    text = text.replace('^', '')    # Caret
+    text = text.replace('&', ' and ')  # Ampersand
+    text = text.replace('*', '')    # Asterisk
+    text = text.replace('_', ' ')   # Underscore to space
+    text = text.replace('|', ' ')   # Pipe to space
+    text = text.replace('\\', ' ')  # Backslash to space
+    text = text.replace('/', ' slash ')   # Forward slash to space (unless in URLs)
+    text = text.replace('=', ' equals ')  # Equals sign
+    text = text.replace('+', ' plus ')    # Plus sign
+
     # Handle various formatting
     text = re.sub(r"(?<=\d)-(?=\d)", " to ", text)
     text = re.sub(r"(?<=\d)S", " S", text)
 
@@ -4,6 +4,7 @@
 import phonemizer
 
 from .normalizer import normalize_text
+from ...structures.schemas import NormalizationOptions
 
 phonemizers = {}
 
@@ -95,8 +96,20 @@ def phonemize(text: str, language: str = "a", normalize: bool = True) -> str:
         Phonemized text
     """
     global phonemizers
+    
+    # Strip input text first to remove problematic leading/trailing spaces
+    text = text.strip()
+    
     if normalize:
-        text = normalize_text(text)
+        # Create default normalization options and normalize text
+        normalization_options = NormalizationOptions()
+        text = normalize_text(text, normalization_options)
+        # Strip again after normalization
+        text = text.strip()
+    
     if language not in phonemizers:
         phonemizers[language] = create_phonemizer(language)
-    return phonemizers[language].phonemize(text)
+    
+    result = phonemizers[language].phonemize(text)
+    # Final strip to ensure no leading/trailing spaces in phonemes
+    return result.strip()