Skip to content

Commit 7afe08b

Browse files
committed
Sanitizes script text to improve speaker detection and prevent voice loading issues.
1 parent 9deb70a commit 7afe08b

File tree

3 files changed

+52
-12
lines changed

3 files changed

+52
-12
lines changed

generate_podcast.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import re
2020
import requests
21-
from utils import get_app_data_dir, find_ffmpeg_path, sanitize_app_settings_for_backend
21+
from utils import get_app_data_dir, find_ffmpeg_path, sanitize_app_settings_for_backend, sanitize_text
2222

2323
# Global logger instance - initialized once when module is imported
2424
logger = logging.getLogger(__name__)
@@ -566,6 +566,9 @@ def generate(script_text: str, app_settings: dict, output_filepath: str, status_
566566
logger.info("Starting generation function.")
567567
status_callback("Starting podcast generation...")
568568

569+
# Sanitize the script text at the entry point of the generation logic
570+
sanitized_script_text = sanitize_text(script_text)
571+
569572
ffmpeg_path = find_ffmpeg_path()
570573
if not ffmpeg_path:
571574
status_callback("--- CRITICAL ERROR ---")
@@ -592,12 +595,12 @@ def generate(script_text: str, app_settings: dict, output_filepath: str, status_
592595
if provider_name == "gemini":
593596
speaker_mapping = (app_settings or {}).get("speaker_voices", {})
594597
provider = GeminiTTS(api_key=api_key)
595-
return provider.synthesize(script_text=script_text, speaker_mapping=speaker_mapping,
598+
return provider.synthesize(script_text=sanitized_script_text, speaker_mapping=speaker_mapping,
596599
output_filepath=output_filepath, status_callback=status_callback)
597600
else:
598601
speaker_mapping = (app_settings or {}).get("speaker_voices_elevenlabs", {})
599602
provider = ElevenLabsTTS(api_key=api_key)
600-
return provider.synthesize(script_text=script_text, speaker_mapping=speaker_mapping,
603+
return provider.synthesize(script_text=sanitized_script_text, speaker_mapping=speaker_mapping,
601604
output_filepath=output_filepath, status_callback=status_callback)
602605

603606

@@ -735,7 +738,7 @@ def sanitize_app_settings_for_backend(app_settings: Dict[str, Any]) -> Dict[str,
735738

736739
temp_script_file_path = None
737740
if args.script_text:
738-
script_text = args.script_text
741+
script_text = sanitize_text(args.script_text)
739742
script_source_description = "the provided text"
740743
if not args.output_filepath:
741744
parser.error("argument --output is required when using --script-text.")
@@ -751,7 +754,7 @@ def sanitize_app_settings_for_backend(app_settings: Dict[str, Any]) -> Dict[str,
751754
else: # script_filepath is guaranteed to be not None here
752755
try:
753756
with open(args.script_filepath, 'r', encoding='utf-8') as f:
754-
script_text = f.read()
757+
script_text = sanitize_text(f.read())
755758
script_filepath_for_demo = args.script_filepath
756759
script_source_description = f"'{os.path.basename(args.script_filepath)}'"
757760
except FileNotFoundError:

gui.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from about_window import AboutWindow
3939
from api_keys_window import APIKeysWindow
4040
from generate_podcast import validate_speakers, update_elevenlabs_quota
41-
from utils import get_asset_path, sanitize_app_settings_for_backend, find_ffplay_path, get_app_data_dir
41+
from utils import get_asset_path, sanitize_app_settings_for_backend, find_ffplay_path, get_app_data_dir, sanitize_text
4242
from create_demo import create_html_demo_whisperx
4343

4444
# --- Versioning ---
@@ -1010,9 +1010,11 @@ def load_script_from_file(self):
10101010

10111011
try:
10121012
with open(filepath, 'r', encoding='utf-8') as f:
1013+
content = f.read()
1014+
sanitized_content = sanitize_text(content)
10131015
self.script_text.delete('1.0', tk.END)
1014-
self.script_text.insert('1.0', f.read())
1015-
self.log_status(f"Script loaded from: {os.path.basename(filepath)}")
1016+
self.script_text.insert('1.0', sanitized_content)
1017+
self.log_status(f"Script loaded and sanitized from: {os.path.basename(filepath)}")
10161018
except Exception as e:
10171019
messagebox.showerror("Reading error", f"Cannot read the file:\n{e}", parent=self.root)
10181020
self.logger.error(f"Error reading the script: {e}")
@@ -1025,11 +1027,13 @@ def start_generation_thread(self):
10251027
parent=self.root)
10261028
return
10271029

1028-
self.last_generated_script = script_content # Store script for demo
1030+
# Sanitize the script content before using it
1031+
sanitized_script = sanitize_text(script_content)
1032+
self.last_generated_script = sanitized_script # Store sanitized script for demo
10291033

10301034
# --- Validate Speaker Voices ---
10311035
try:
1032-
missing_speakers, configured_speakers = validate_speakers(script_content, self.app_settings)
1036+
missing_speakers, configured_speakers = validate_speakers(sanitized_script, self.app_settings)
10331037
except ValueError as e:
10341038
# Règle Gemini: plus de 2 speakers -> erreur bloquante
10351039
messagebox.showerror("Configuration Error", str(e), parent=self.root)
@@ -1091,7 +1095,7 @@ def start_generation_thread(self):
10911095

10921096
thread = threading.Thread(
10931097
target=self.run_generation,
1094-
args=(script_content, output_filepath, self.app_settings, self.api_key)
1098+
args=(sanitized_script, output_filepath, self.app_settings, self.api_key)
10951099
)
10961100
thread.daemon = True
10971101
thread.start()

utils.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
import sys
33
import shutil
44
from typing import Optional, Dict, Any
5-
5+
import re
6+
import unicodedata
7+
from html import unescape
68

79
def get_asset_path(filename: str) -> Optional[str]:
810
"""
@@ -57,6 +59,37 @@ def find_ffplay_path() -> Optional[str]:
5759
return _find_command_path("ffplay")
5860

5961

62+
def sanitize_text(text: str) -> str:
63+
if not text:
64+
return ""
65+
66+
# 1️⃣ Enlève le HTML ou XML résiduel (ex : <p>, <o:p> de Word)
67+
text = re.sub(r"<[^>]+>", " ", text)
68+
69+
# 2️⃣ Décode les entités HTML (ex: &nbsp;, &amp;)
70+
text = unescape(text)
71+
72+
# 3️⃣ Normalise les caractères unicode (accents, quotes, symboles)
73+
text = unicodedata.normalize("NFKC", text)
74+
75+
# 4️⃣ Remplace les espaces insécables et similaires par des espaces normaux
76+
text = re.sub(r"[\u00A0\u2000-\u200B\u202F\u205F\u3000]", " ", text)
77+
78+
# 5️⃣ Supprime les caractères de contrôle invisibles (retours chariots bizarres, etc.)
79+
text = re.sub(r"[\x00-\x1f\x7f-\x9f]", "", text)
80+
81+
# 6️⃣ Remplace les guillemets Word “smart quotes” par des guillemets simples
82+
text = text.translate(str.maketrans({
83+
"“": '"', "”": '"',
84+
"‘": "'", "’": "'",
85+
"–": "-", "—": "-", "•": "-"
86+
}))
87+
88+
# 7️⃣ Réduit les espaces multiples
89+
text = re.sub(r"\s+", " ", text).strip()
90+
91+
return text
92+
6093
def sanitize_app_settings_for_backend(app_settings: Dict[str, Any]) -> Dict[str, Any]:
6194
"""
6295
Creates a "clean" version of app_settings suitable for the backend.

0 commit comments

Comments
 (0)