Adds consistent speaker formatting for audio feedback: adjusts script parsing to handle speaker format change and updates Python script to account for new format.

laurentftech · laurentftech · commit 6f9c9d028d15 · 2025-12-07T16:22:12.000+01:00
diff --git a/generate_podcast.py b/generate_podcast.py
@@ -265,19 +265,40 @@ def synthesize(self, script_text: str, speaker_mapping: Dict[str, str], output_f
 
     def _parse_script_segments(self, script_text: str) -> List[Tuple[str, str]]:
         segments = []
+        current_speaker = None
+        current_text_lines = []
+
         for raw_line in script_text.splitlines():
             line = raw_line.strip()
             if not line:
                 continue
-            m = re.match(r"^(\w+):\s*(.+)$", line)
-            if not m:
-                self.logger.info(f"Skipping non-dialogue line for ElevenLabs: '{line}'")
-                continue
-            speaker, text = m.group(1).strip(), m.group(2).strip()
-            # Apply sanitize_text here, after speaker and text are separated
-            text = sanitize_text(text)
-            if text:
-                segments.append((speaker, text))
+
+            match = re.match(r"^(\w+)\s*:\s*(.+)$", line)
+            
+            if match:
+                # This is a new speaker line.
+                # First, save the previous speaker's collected text if it exists.
+                if current_speaker and current_text_lines:
+                    full_text = " ".join(current_text_lines)
+                    # Sanitize the joined text, then remove any newlines for ElevenLabs.
+                    sanitized_text = sanitize_text(full_text).replace('\n', ' ').replace('\r', '')
+                    if sanitized_text:
+                        segments.append((current_speaker, sanitized_text))
+
+                # Start the new speaker's block.
+                current_speaker = match.group(1).strip()
+                current_text_lines = [match.group(2).strip()]
+            elif current_speaker:
+                # This is a continuation of the current speaker's dialogue.
+                current_text_lines.append(line)
+
+        # After the loop, add the last speaker's segment if it exists.
+        if current_speaker and current_text_lines:
+            full_text = " ".join(current_text_lines)
+            sanitized_text = sanitize_text(full_text).replace('\n', ' ').replace('\r', '')
+            if sanitized_text:
+                segments.append((current_speaker, sanitized_text))
+        
         return segments
 
 
@@ -396,10 +417,10 @@ def sanitize_app_settings_for_backend(app_settings: Dict[str, Any]) -> Dict[str,
     clean_elevenlabs = {}
     for speaker, data in elevenlabs_voices.items():
         if isinstance(data, dict):
-            elevenlabs_mapping_clean[speaker] = data.get('id', '')
+            clean_elevenlabs[speaker] = data.get('id', '')
         else:
             # Legacy format: use the string as-is
-            elevenlabs_mapping_clean[speaker] = data
+            clean_elevenlabs[speaker] = data
     clean_settings["speaker_voices_elevenlabs"] = clean_elevenlabs
     
     return clean_settings
diff --git a/templates/index.html b/templates/index.html
@@ -552,7 +552,7 @@ <h3>Asset Credits</h3>
             }
 
             function cleanScript(text) {
-                return text.split('\n').filter(line => /^\s*\w+:\s*.+/.test(line)).join('\n');
+                return text.split('\n').filter(line => /^\s*\w+\s*:\s*.+/.test(line)).join('\n');
             }
 
             function validateSpeakersInUI() {
@@ -568,7 +568,7 @@ <h3>Asset Credits</h3>
                 const scriptText = scriptTextarea.value;
                 const combinedText = (instructionText.trim() ? instructionText + '\n' : '') + scriptText;
 
-                const speakersInScript = (combinedText.match(/^\s*(\w+):/gm) || []).map(s => s.match(/^\s*(\w+):/)[1]);
+                const speakersInScript = (combinedText.match(/^\s*(\w+)\s*:/gm) || []).map(s => s.match(/^\s*(\w+)\s*:/)[1]);
                 const uniqueSpeakers = [...new Set(speakersInScript)];
 
                 if (uniqueSpeakers.length > 2) {
@@ -596,7 +596,7 @@ <h3>Asset Credits</h3>
                     let processedLine = line;
                     
                     // Check if this line contains a speaker (marks the end of the instruction block)
-                    const isSpeakerLine = /^\s*\w+:/.test(line);
+                    const isSpeakerLine = /^\s*\w+\s*:/.test(line);
                     if (isSpeakerLine) {
                         instructionBlockEnded = true;
                     }
@@ -606,7 +606,7 @@ <h3>Asset Credits</h3>
 
                     if (isSpeakerLine) {
                         // Bold speaker names
-                        processedLine = processedLine.replace(/^(\s*)(\w+):/m, '$1<strong>$2:</strong>');
+                        processedLine = processedLine.replace(/^(\s*)(\w+\s*):/m, '$1<strong>$2:</strong>');
                     } else if (!instructionBlockEnded && line.trim() !== '') {
                         // If it's an instruction line (before any speaker) and not empty, wrap the whole line in italics.
                         processedLine = `<em>${processedLine}</em>`;
@@ -747,7 +747,7 @@ <h3>Asset Credits</h3>
                 const scriptText = scriptTextarea.value; // Use the actual textarea value
                 const lines = scriptText.split('\n');
                 const speakersInScript = lines.map(line => {
-                    const match = line.match(/^\s*(\w+):/);
+                    const match = line.match(/^\s*(\w+)\s*:/);
                     return match ? match[1] : null;
                 }).filter(Boolean);
                 const uniqueSpeakers = [...new Set(speakersInScript)];