Enables async podcast generation with task tracking and UI stop feature.

laurentftech · laurentftech · commit d242e6a3dab9 · 2025-11-13T11:52:11.000+01:00
diff --git a/app.py b/app.py
@@ -1,5 +1,5 @@
 from flask import Flask, render_template, request, jsonify, send_from_directory
-from generate_podcast import generate, PODCAST_SCRIPT, setup_logging, validate_speakers, update_elevenlabs_quota
+from generate_podcast import generate, DEFAULT_INSTRUCTION, DEFAULT_SCRIPT, setup_logging, validate_speakers, update_elevenlabs_quota
 from utils import sanitize_text, get_asset_path, get_app_data_dir
 from config import AVAILABLE_VOICES, DEFAULT_APP_SETTINGS, DEMO_AVAILABLE
 from create_demo import create_html_demo_whisperx
@@ -11,11 +11,16 @@
 import shutil
 from elevenlabs.core import ApiError
 import re
+import uuid
+import threading
 
 # --- App Initialization ---
 app = Flask(__name__)
 logger = setup_logging()
 
+# --- In-Memory Task Manager ---
+tasks = {}
+
 # --- Version & License ---
 try:
     from _version import __version__
@@ -55,7 +60,10 @@ def save_settings(settings):
 # --- Routes ---
 @app.route('/')
 def index():
-    return render_template('index.html', default_script=PODCAST_SCRIPT, demo_available=DEMO_AVAILABLE)
+    return render_template('index.html', 
+                           default_instruction=DEFAULT_INSTRUCTION, 
+                           default_script=DEFAULT_SCRIPT, 
+                           demo_available=DEMO_AVAILABLE)
 
 @app.route('/assets/<path:filename>')
 def get_asset(filename):
@@ -130,6 +138,38 @@ def get_gemini_sample(voice_name):
         return "Sample directory not found", 404
     return send_from_directory(sample_path, f"{voice_name}.mp3")
 
+def run_generation_task(task_id, script_text, app_settings, output_filepath, api_key):
+    """The target function for the generation thread."""
+    stop_event = tasks[task_id]['stop_event']
+    try:
+        generated_file = generate(
+            script_text=script_text,
+            app_settings=app_settings,
+            output_filepath=output_filepath,
+            api_key=api_key,
+            status_callback=logger.info,
+            stop_event=stop_event
+        )
+        if generated_file:
+            tasks[task_id]['status'] = 'completed'
+            tasks[task_id]['result'] = {'download_url': f'/temp/{os.path.basename(generated_file)}', 'filename': os.path.basename(generated_file)}
+    except Exception as e:
+        # If the exception is due to the stop event, set a specific status
+        if "stopped by user" in str(e):
+            tasks[task_id]['status'] = 'cancelled'
+            tasks[task_id]['error'] = 'Generation cancelled by user.'
+            # Clean up the partially created file
+            if os.path.exists(output_filepath):
+                try:
+                    os.remove(output_filepath)
+                    logger.info(f"Removed partial file for stopped task: {output_filepath}")
+                except OSError as err:
+                    logger.error(f"Error removing partial file for stopped task: {err}")
+        else:
+            logger.error(f"Error during generation for task {task_id}: {e}", exc_info=True)
+            tasks[task_id]['status'] = 'failed'
+            tasks[task_id]['error'] = str(e)
+
 @app.route('/generate', methods=['POST'])
 def handle_generate():
     script_text = request.form.get('script', '')
@@ -146,46 +186,53 @@ def handle_generate():
     except ValueError as e:
         return jsonify({'error': str(e)}), 400
 
-    first_words = re.sub(r'<[^>]+>', '', sanitized_script).strip().split()[:2]
-    base_name = "_".join(first_words).lower()
-    safe_base_name = re.sub(r'[^a-z0-9_]+', '', base_name)
-    if not safe_base_name:
-        safe_base_name = "podcast"
-    random_suffix = os.urandom(4).hex()
-    output_filename = f"{safe_base_name}_{random_suffix}.mp3"
-    
-    output_filepath = os.path.join(app.config['TEMP_DIR'], output_filename)
-
     provider = app_settings.get("tts_provider", "elevenlabs")
     api_key_env_var = "ELEVENLABS_API_KEY" if provider == "elevenlabs" else "GEMINI_API_KEY"
     api_key = os.environ.get(api_key_env_var)
-
     if not api_key:
         return jsonify({'error': f'API key ({api_key_env_var}) not found in environment variables.'}), 500
 
     from utils import sanitize_app_settings_for_backend
     app_settings_clean = sanitize_app_settings_for_backend(app_settings)
 
-    try:
-        generated_file = generate(
-            script_text=sanitized_script,
-            app_settings=app_settings_clean,
-            output_filepath=output_filepath,
-            api_key=api_key,
-            status_callback=logger.info
-        )
-        if generated_file:
-            return jsonify({'download_url': f'/temp/{output_filename}', 'filename': output_filename})
-        else:
-            return jsonify({'error': 'Generation failed for an unknown reason. Check server logs.'}), 500
-    except ApiError as e:
-        error_detail = e.body.get('detail', {})
-        message = error_detail.get('message', 'An unknown ElevenLabs API error occurred.')
-        logger.error(f"ElevenLabs API Error: {message}")
-        return jsonify({'error': f"ElevenLabs Error: {message}"}), 500
-    except Exception as e:
-        logger.error(f"Error during generation: {e}", exc_info=True)
-        return jsonify({'error': f'An unexpected error occurred: {str(e)}'}), 500
+    task_id = str(uuid.uuid4())
+    output_filename = f"{task_id}.mp3"
+    output_filepath = os.path.join(app.config['TEMP_DIR'], output_filename)
+    
+    stop_event = threading.Event()
+    thread = threading.Thread(target=run_generation_task, args=(task_id, sanitized_script, app_settings_clean, output_filepath, api_key))
+    
+    tasks[task_id] = {'thread': thread, 'stop_event': stop_event, 'status': 'running'}
+    thread.start()
+    
+    return jsonify({'task_id': task_id})
+
+@app.route('/api/generation_status/<task_id>', methods=['GET'])
+def get_generation_status(task_id):
+    task = tasks.get(task_id)
+    if not task:
+        return jsonify({'error': 'Task not found'}), 404
+    
+    response = {'status': task['status']}
+    if task['status'] == 'completed':
+        response['result'] = task['result']
+    elif task['status'] in ['failed', 'cancelled']:
+        response['error'] = task.get('error', 'An unknown error occurred.')
+        
+    return jsonify(response)
+
+@app.route('/api/stop_generation/<task_id>', methods=['POST'])
+def stop_generation(task_id):
+    task = tasks.get(task_id)
+    if not task:
+        return jsonify({'error': 'Task not found'}), 404
+    
+    if task['status'] == 'running':
+        task['stop_event'].set()
+        task['status'] = 'stopping'
+        return jsonify({'status': 'Stop signal sent.'})
+    
+    return jsonify({'status': 'Task was not running.'})
 
 @app.route('/api/generate_demo', methods=['POST'])
 def handle_generate_demo():
diff --git a/generate_podcast.py b/generate_podcast.py
@@ -12,6 +12,7 @@
 import getpass
 from typing import Optional, Any, Dict, List, Tuple
 import tempfile
+import threading
 
 import json
 import keyring  # For secure credential storage
@@ -23,11 +24,12 @@
 # Global logger instance - initialized once when module is imported
 logger = logging.getLogger(__name__)
 
-# The podcast script is now a constant to be used by the console mode.
-PODCAST_SCRIPT = """Read aloud in a warm, welcoming tone
-John: [playful] Who am I? I am a little old lady. My hair is white. I have got a small crown and a black handbag. My dress is blue. My country's flag is red, white and blue. I am on many coins and stamps. I love dogs, my dogs' names are corgis! Who am I??
+# The podcast script is now split into instruction and main script
+DEFAULT_INSTRUCTION = "Read aloud in a warm, welcoming tone"
+DEFAULT_SCRIPT = """John: [playful] Who am I? I am a little old lady. My hair is white. I have got a small crown and a black handbag. My dress is blue. My country's flag is red, white and blue. I am on many coins and stamps. I love dogs, my dogs' names are corgis! Who am I??
 Samantha: [laughing] You're queen Elizabeth II!!
 """
+PODCAST_SCRIPT = f"{DEFAULT_INSTRUCTION}\n{DEFAULT_SCRIPT}"
 
 
 def setup_logging() -> logging.Logger:
@@ -127,15 +129,15 @@ def get_api_key(status_callback, logger: logging.Logger, parent_window=None, ser
 
 
 class TTSProvider:
-    def synthesize(self, script_text: str, speaker_mapping: dict, output_filepath: str, status_callback=print) -> str:
+    def synthesize(self, script_text: str, speaker_mapping: dict, output_filepath: str, status_callback=print, stop_event: Optional[threading.Event] = None) -> str:
         raise NotImplementedError
 
 
 class GeminiTTS(TTSProvider):
     def __init__(self, api_key: str):
         self.api_key = api_key
 
-    def synthesize(self, script_text: str, speaker_mapping: dict, output_filepath: str, status_callback=print) -> str:
+    def synthesize(self, script_text: str, speaker_mapping: dict, output_filepath: str, status_callback=print, stop_event: Optional[threading.Event] = None) -> str:
         logger = logging.getLogger("PodcastGenerator")
         client = genai.Client(api_key=self.api_key)
 
@@ -161,11 +163,15 @@ def synthesize(self, script_text: str, speaker_mapping: dict, output_filepath: s
         generate_content_config = types.GenerateContentConfig(temperature=1, response_modalities=["audio"], speech_config=speech_config)
 
         for i, model_name in enumerate(models_to_try):
+            if stop_event and stop_event.is_set():
+                raise Exception("Generation stopped by user.")
             status_callback(f"\nAttempting generation with model: {model_name}...")
             try:
                 audio_chunks = []
                 final_mime_type = ""
                 for chunk in client.models.generate_content_stream(model=model_name, contents=contents, config=generate_content_config):
+                    if stop_event and stop_event.is_set():
+                        raise Exception("Generation stopped by user during streaming.")
                     if not (chunk.candidates and chunk.candidates[0].content and chunk.candidates[0].content.parts):
                         continue
                     part = chunk.candidates[0].content.parts[0]
@@ -194,7 +200,7 @@ def __init__(self, api_key: str):
         self.client = ElevenLabs(api_key=api_key)
         self.logger = logging.getLogger("PodcastGenerator")
 
-    def synthesize(self, script_text: str, speaker_mapping: Dict[str, str], output_filepath: str, status_callback=print) -> str:
+    def synthesize(self, script_text: str, speaker_mapping: Dict[str, str], output_filepath: str, status_callback=print, stop_event: Optional[threading.Event] = None) -> str:
         segments = self._parse_script_segments(script_text)
         if not segments:
             raise ValueError("No valid dialogue segments found in the script. Ensure lines are in 'Speaker: Text' format.")
@@ -220,6 +226,8 @@ def synthesize(self, script_text: str, speaker_mapping: Dict[str, str], output_f
             
             with open(output_filepath, "wb") as f:
                 for chunk in audio_generator:
+                    if stop_event and stop_event.is_set():
+                        raise Exception("Generation stopped by user during streaming.")
                     f.write(chunk)
             
             status_callback(f"File saved successfully: {output_filepath}")
@@ -228,8 +236,12 @@ def synthesize(self, script_text: str, speaker_mapping: Dict[str, str], output_f
             self.logger.error(f"ElevenLabs API error: {e}")
             raise e
         except Exception as e:
-            self.logger.error(f"ElevenLabs critical error: {e}", exc_info=True)
-            raise Exception(f"An unexpected critical error occurred in ElevenLabs TTS: {e}")
+            # Don't re-raise the "stopped by user" exception, just let it be handled in the main generate function
+            if "stopped by user" not in str(e):
+                self.logger.error(f"ElevenLabs critical error: {e}", exc_info=True)
+                raise Exception(f"An unexpected critical error occurred in ElevenLabs TTS: {e}")
+            # Re-raise the stop exception to be caught by the task runner
+            raise e
 
     def _parse_script_segments(self, script_text: str) -> List[Tuple[str, str]]:
         segments = []
@@ -306,11 +318,14 @@ def validate_speakers(script_text: str, app_settings: Dict[str, Any]) -> Tuple[L
     return missing_speakers, configured_speakers
 
 
-def generate(script_text: str, app_settings: dict, output_filepath: str, status_callback=print, api_key: Optional[str] = None, parent_window=None) -> str:
+def generate(script_text: str, app_settings: dict, output_filepath: str, status_callback=print, api_key: Optional[str] = None, parent_window=None, stop_event: Optional[threading.Event] = None) -> str:
     logger = logging.getLogger("PodcastGenerator")
     logger.info("Starting generation function.")
     status_callback("Starting podcast generation...")
 
+    if stop_event and stop_event.is_set():
+        raise Exception("Generation stopped by user before starting.")
+
     sanitized_script_text = sanitize_text(script_text)
     if not find_ffmpeg_path():
         raise FileNotFoundError("FFmpeg executable not found.")
@@ -331,7 +346,7 @@ def generate(script_text: str, app_settings: dict, output_filepath: str, status_
     ProviderClass = ElevenLabsTTS if provider_name == "elevenlabs" else GeminiTTS
     provider = ProviderClass(api_key=api_key)
     
-    return provider.synthesize(script_text=sanitized_script_text, speaker_mapping=speaker_mapping, output_filepath=output_filepath, status_callback=status_callback)
+    return provider.synthesize(script_text=sanitized_script_text, speaker_mapping=speaker_mapping, output_filepath=output_filepath, status_callback=status_callback, stop_event=stop_event)
 
 
 def parse_audio_mime_type(mime_type: str) -> Dict[str, int]:
diff --git a/templates/index.html b/templates/index.html