@@ -391,6 +391,7 @@ def handle_time(t: re.Match[str]) -> str:
391391
392392def normalize_text (text : str , normalization_options : NormalizationOptions ) -> str :
393393 """Normalize text for TTS processing"""
394+
394395 # Handle email addresses first if enabled
395396 if normalization_options .email_normalization :
396397 text = EMAIL_PATTERN .sub (handle_email , text )
@@ -415,7 +416,7 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
415416 text ,
416417 )
417418
418- # Replace quotes and brackets
419+ # Replace quotes and brackets (additional cleanup)
419420 text = text .replace (chr (8216 ), "'" ).replace (chr (8217 ), "'" )
420421 text = text .replace ("«" , chr (8220 )).replace ("»" , chr (8221 ))
421422 text = text .replace (chr (8220 ), '"' ).replace (chr (8221 ), '"' )
@@ -435,6 +436,11 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
435436 text = re .sub (r" +" , " " , text )
436437 text = re .sub (r"(?<=\n) +(?=\n)" , "" , text )
437438
439+ # Handle special characters that might cause audio artifacts first
440+ # Replace newlines with spaces (or pauses if needed)
441+ text = text .replace ('\n ' , ' ' )
442+ text = text .replace ('\r ' , ' ' )
443+
438444 # Handle titles and abbreviations
439445 text = re .sub (r"\bD[Rr]\.(?= [A-Z])" , "Doctor" , text )
440446 text = re .sub (r"\b(?:Mr\.|MR\.(?= [A-Z]))" , "Mister" , text )
@@ -445,7 +451,7 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
445451 # Handle common words
446452 text = re .sub (r"(?i)\b(y)eah?\b" , r"\1e'a" , text )
447453
448- # Handle numbers and money
454+ # Handle numbers and money BEFORE replacing special characters
449455 text = re .sub (r"(?<=\d),(?=\d)" , "" , text )
450456
451457 text = MONEY_PATTERN .sub (
@@ -457,6 +463,22 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
457463
458464 text = re .sub (r"\d*\.\d+" , handle_decimal , text )
459465
466+ # Handle other problematic symbols AFTER money/number processing
467+ text = text .replace ('~' , '' ) # Remove tilde
468+ text = text .replace ('@' , ' at ' ) # At symbol
469+ text = text .replace ('#' , ' number ' ) # Hash/pound
470+ text = text .replace ('$' , ' dollar ' ) # Dollar sign (if not handled by money pattern)
471+ text = text .replace ('%' , ' percent ' ) # Percent sign
472+ text = text .replace ('^' , '' ) # Caret
473+ text = text .replace ('&' , ' and ' ) # Ampersand
474+ text = text .replace ('*' , '' ) # Asterisk
475+ text = text .replace ('_' , ' ' ) # Underscore to space
476+ text = text .replace ('|' , ' ' ) # Pipe to space
477+ text = text .replace ('\\ ' , ' ' ) # Backslash to space
478+ text = text .replace ('/' , ' slash ' ) # Forward slash to space (unless in URLs)
479+ text = text .replace ('=' , ' equals ' ) # Equals sign
480+ text = text .replace ('+' , ' plus ' ) # Plus sign
481+
460482 # Handle various formatting
461483 text = re .sub (r"(?<=\d)-(?=\d)" , " to " , text )
462484 text = re .sub (r"(?<=\d)S" , " S" , text )
0 commit comments