Skip to content

Commit 3b59e55

Browse files
committed
Added WhisperX support, demo feature, and updated Docker image tags
1 parent f69081c commit 3b59e55

File tree

8 files changed

+252
-109
lines changed

8 files changed

+252
-109
lines changed

Dockerfile

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,51 @@
1-
# Use an official Python runtime as a parent image
2-
FROM python:3.11-slim-bookworm
1+
# --- Stage 1: Base dependencies ---
2+
FROM python:3.11-slim-bookworm AS base
33

44
# Set the working directory in the container
55
WORKDIR /app
66

77
# Install FFmpeg (required for audio processing)
8+
# Also install git for whisperx dependencies
89
RUN apt-get update && apt-get install -y --no-install-recommends \
910
ffmpeg \
11+
git \
1012
&& rm -rf /var/lib/apt/lists/*
1113

12-
# Copy the requirements file into the container at /app
14+
# Copy the core requirements file
1315
COPY requirements.txt .
1416

15-
# Install any needed packages specified in requirements.txt
17+
# Install core dependencies
1618
RUN pip install --no-cache-dir -r requirements.txt
1719

18-
# Copy the rest of the application code into the container
20+
# Copy the rest of the application code
1921
COPY . .
2022

23+
# --- Stage 2: Build with WhisperX support ---
24+
FROM base AS with_whisperx
25+
26+
# Install PyTorch (CPU-only for broader compatibility in Docker) and torchaudio
27+
# Then install whisperx
28+
RUN pip install --no-cache-dir \
29+
torch==2.2.2+cpu \
30+
torchaudio==2.2.2+cpu \
31+
-f https://download.pytorch.org/whl/torch_stable.html \
32+
&& pip install --no-cache-dir whisperx==3.1.1
33+
34+
# Set environment variable to enable demo features
35+
ENV DEMO_AVAILABLE=1
36+
37+
# Expose the port that Gunicorn will listen on
38+
EXPOSE 8000
39+
40+
# Run gunicorn to serve the Flask application
41+
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "app:app"]
42+
43+
# --- Stage 3: Build without WhisperX support ---
44+
FROM base AS without_whisperx
45+
46+
# Set environment variable to disable demo features
47+
ENV DEMO_AVAILABLE=0
48+
2149
# Expose the port that Gunicorn will listen on
2250
EXPOSE 8000
2351

app.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from flask import Flask, render_template, request, jsonify, send_from_directory
22
from generate_podcast import generate, PODCAST_SCRIPT, setup_logging, validate_speakers, update_elevenlabs_quota
33
from utils import sanitize_text, get_asset_path, get_app_data_dir
4-
from config import AVAILABLE_VOICES, DEFAULT_APP_SETTINGS
4+
from config import AVAILABLE_VOICES, DEFAULT_APP_SETTINGS, DEMO_AVAILABLE
55
from create_demo import create_html_demo_whisperx
66
import os
77
import tempfile
@@ -55,7 +55,7 @@ def save_settings(settings):
5555
# --- Routes ---
5656
@app.route('/')
5757
def index():
58-
return render_template('index.html', default_script=PODCAST_SCRIPT)
58+
return render_template('index.html', default_script=PODCAST_SCRIPT, demo_available=DEMO_AVAILABLE)
5959

6060
@app.route('/assets/<path:filename>')
6161
def get_asset(filename):
@@ -189,6 +189,10 @@ def handle_generate():
189189

190190
@app.route('/api/generate_demo', methods=['POST'])
191191
def handle_generate_demo():
192+
# If DEMO_AVAILABLE is not set to "1", return an error
193+
if not DEMO_AVAILABLE:
194+
return jsonify({'error': 'Demo generation is not available.'}), 403
195+
192196
data = request.json
193197
script_text, audio_filename = data.get('script'), data.get('audio_filename')
194198
title, subtitle = data.get('title', 'Podcast Demo'), data.get('subtitle', '')

config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
SERVICE_CONFIG = {
24
"elevenlabs": {
35
"title": "ElevenLabs API Key",
@@ -54,4 +56,7 @@
5456
"Samantha": {"id": "cgSgspJ2msm6clMCkdW9", "display_name": "Jessica - Female, Young, american"}
5557
},
5658
"elevenlabs_quota_cache": None
57-
}
59+
}
60+
61+
# Environment variable to control the demo button visibility
62+
DEMO_AVAILABLE = os.getenv("DEMO_AVAILABLE") == "1"

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ services:
99
environment:
1010
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
1111
- GEMINI_API_KEY=${GEMINI_API_KEY}
12+
- DEMO_AVAILABLE=${DEMO_AVAILABLE}
1213
restart: always

docker_push.sh

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/bin/bash
2+
3+
# This script automates the process of building and pushing two versions of a Docker image
4+
# to Docker Hub: a full version ('latest') and a lightweight version ('light').
5+
# Each version is tagged with a specific version, a major version, and a fixed tag.
6+
#
7+
# Usage:
8+
# 1. Make the script executable: chmod +x docker_push.sh
9+
# 2. Run the script with a version number: ./docker_push.sh <version>
10+
#
11+
# Example:
12+
# ./docker_push.sh 2.0.0b8
13+
14+
# Exit immediately if a command exits with a non-zero status.
15+
set -e
16+
17+
# --- Configuration ---
18+
# Your Docker Hub username
19+
USERNAME="gandulf78"
20+
# The name of the image
21+
IMAGE_NAME="podcast_generator"
22+
23+
# --- Script Logic ---
24+
25+
# Check if a version tag was provided as an argument
26+
if [ -z "$1" ]; then
27+
echo "Error: No version tag provided." >&2
28+
echo "Usage: ./docker_push.sh <version>" >&2
29+
echo "Example: ./docker_push.sh 2.0.0b8" >&2
30+
exit 1
31+
fi
32+
33+
VERSION=$1
34+
# Extract the major version number (e.g., "2" from "2.0.0b8")
35+
MAJOR_VERSION=$(echo "$VERSION" | cut -d. -f1)
36+
37+
# --- Full Version (with WhisperX) ---
38+
TAG_SPECIFIC_FULL="$USERNAME/$IMAGE_NAME:$VERSION"
39+
TAG_MAJOR_FULL="$USERNAME/$IMAGE_NAME:$MAJOR_VERSION"
40+
TAG_LATEST_FULL="$USERNAME/$IMAGE_NAME:latest"
41+
42+
echo "--- Building and tagging FULL Docker image (with WhisperX) ---"
43+
echo " > Specific tag: $TAG_SPECIFIC_FULL"
44+
echo " > Major tag: $TAG_MAJOR_FULL"
45+
echo " > Latest tag: $TAG_LATEST_FULL"
46+
47+
# 1. Build the full image using the 'with_whisperx' target
48+
docker build --target with_whisperx -t "$TAG_SPECIFIC_FULL" .
49+
50+
# 2. Add the other tags to the same image
51+
docker tag "$TAG_SPECIFIC_FULL" "$TAG_MAJOR_FULL"
52+
docker tag "$TAG_SPECIFIC_FULL" "$TAG_LATEST_FULL"
53+
54+
echo "--- Pushing all FULL tags to Docker Hub ---"
55+
# 3. Push all full tags to Docker Hub
56+
docker push "$TAG_SPECIFIC_FULL"
57+
docker push "$TAG_MAJOR_FULL"
58+
docker push "$TAG_LATEST_FULL"
59+
60+
echo "✅ Successfully pushed all FULL tags to Docker Hub!"
61+
62+
# --- Light Version (without WhisperX) ---
63+
TAG_SPECIFIC_LIGHT="$USERNAME/$IMAGE_NAME:${VERSION}-light"
64+
TAG_MAJOR_LIGHT="$USERNAME/$IMAGE_NAME:${MAJOR_VERSION}-light"
65+
TAG_FIXED_LIGHT="$USERNAME/$IMAGE_NAME:light"
66+
67+
echo ""
68+
echo "--- Building and tagging LIGHT Docker image (without WhisperX) ---"
69+
echo " > Specific tag: $TAG_SPECIFIC_LIGHT"
70+
echo " > Major tag: $TAG_MAJOR_LIGHT"
71+
echo " > Fixed tag: $TAG_FIXED_LIGHT"
72+
73+
# 1. Build the light image using the 'without_whisperx' target
74+
docker build --target without_whisperx -t "$TAG_SPECIFIC_LIGHT" .
75+
76+
# 2. Add the other tags to the same image
77+
docker tag "$TAG_SPECIFIC_LIGHT" "$TAG_MAJOR_LIGHT"
78+
docker tag "$TAG_SPECIFIC_LIGHT" "$TAG_FIXED_LIGHT"
79+
80+
echo "--- Pushing all LIGHT tags to Docker Hub ---"
81+
# 3. Push all light tags to Docker Hub
82+
docker push "$TAG_SPECIFIC_LIGHT"
83+
docker push "$TAG_MAJOR_LIGHT"
84+
docker push "$TAG_FIXED_LIGHT"
85+
86+
echo "✅ Successfully pushed all LIGHT tags to Docker Hub!"
87+
echo ""
88+
echo "🚀 All versions have been successfully built and pushed."

gui.py

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from generate_podcast import validate_speakers, update_elevenlabs_quota
4141
from utils import get_asset_path, sanitize_app_settings_for_backend, find_ffplay_path, get_app_data_dir, sanitize_text
4242
from create_demo import create_html_demo_whisperx
43-
from config import AVAILABLE_VOICES, DEFAULT_APP_SETTINGS
43+
from config import AVAILABLE_VOICES, DEFAULT_APP_SETTINGS, DEMO_AVAILABLE
4444

4545
# --- Versioning ---
4646
# This file is automatically generated by setuptools-scm
@@ -92,6 +92,7 @@ def __init__(self, root: tk.Tk, generate_func, logger, api_key: str, default_scr
9292
self.ffplay_path = find_ffplay_path()
9393
self.is_whisperx_available = self.check_whisperx_availability()
9494
self.elevenlabs_quota_text = None # New state variable
95+
self.demo_available = DEMO_AVAILABLE # Read environment variable
9596

9697
self.app_settings = self.load_settings()
9798
self.provider_var = tk.StringVar(value=self.app_settings.get("tts_provider", "elevenlabs").lower())
@@ -233,16 +234,17 @@ def _setup_ctk_menu(self):
233234
self.settings_menu.add_option("Quit", command=self.root.quit)
234235

235236
# Menu Actions
236-
actions_button = self.menu_bar.add_cascade("Actions")
237-
self.actions_menu = CTkMenuBar.CustomDropdownMenu(
238-
widget=actions_button,
239-
corner_radius=3,
240-
border_width=1,
241-
border_color=("gray70", "gray25")
242-
)
243-
self.demo_menu_item = self.actions_menu.add_option("Generate HTML Demo...",
244-
command=self.start_demo_generation_thread,
245-
state='disabled')
237+
if self.demo_available:
238+
actions_button = self.menu_bar.add_cascade("Actions")
239+
self.actions_menu = CTkMenuBar.CustomDropdownMenu(
240+
widget=actions_button,
241+
corner_radius=3,
242+
border_width=1,
243+
border_color=("gray70", "gray25")
244+
)
245+
self.demo_menu_item = self.actions_menu.add_option("Generate HTML Demo...",
246+
command=self.start_demo_generation_thread,
247+
state='disabled')
246248

247249
# Menu Help
248250
help_button = self.menu_bar.add_cascade("Help")
@@ -302,12 +304,13 @@ def _setup_tkinter_menu(self):
302304
self.settings_menu.add_command(label="Quit", command=self.root.quit)
303305

304306
# --- Actions Menu ---
305-
self.actions_menu = tk.Menu(self.menubar, tearoff=0)
306-
self._apply_menu_theme(self.actions_menu)
307-
self.menubar.add_cascade(label="Actions", menu=self.actions_menu)
308-
self.actions_menu.add_command(label="Generate HTML Demo...",
309-
command=self.start_demo_generation_thread,
310-
state='disabled')
307+
if self.demo_available:
308+
self.actions_menu = tk.Menu(self.menubar, tearoff=0)
309+
self._apply_menu_theme(self.actions_menu)
310+
self.menubar.add_cascade(label="Actions", menu=self.actions_menu)
311+
self.actions_menu.add_command(label="Generate HTML Demo...",
312+
command=self.start_demo_generation_thread,
313+
state='disabled')
311314

312315
# Help Menu (common to all platforms)
313316
self.help_menu = tk.Menu(self.menubar, tearoff=0)
@@ -373,6 +376,8 @@ def update_voice_settings_enabled_ctk(self):
373376

374377
def update_demo_menu_state_ctk(self, enabled):
375378
"""Met à jour l'état du menu demo avec CTkMenuBarPlus."""
379+
if not self.demo_available:
380+
return
376381
try:
377382
if hasattr(self.actions_menu, 'configure_option') and self.demo_menu_item:
378383
state = 'normal' if enabled else 'disabled'
@@ -885,20 +890,6 @@ def _wait_for_cache():
885890
# If not using ElevenLabs or cache is ready, open immediately.
886891
self._show_settings_window()
887892

888-
def _show_settings_window(self):
889-
"""Creates and displays the actual settings window."""
890-
from settings_window import VoiceSettingsWindow
891-
VoiceSettingsWindow(
892-
self.root,
893-
current_settings=self.app_settings,
894-
save_callback=self.save_settings,
895-
close_callback=self.on_settings_window_close,
896-
default_settings=DEFAULT_APP_SETTINGS,
897-
preloaded_elevenlabs_voices=self.elevenlabs_voices_cache,
898-
play_gemini_sample_callback=self.play_gemini_voice_sample,
899-
play_elevenlabs_sample_callback=self.play_elevenlabs_voice_sample
900-
)
901-
902893
def show_about_window(self):
903894
"""Displays the 'About' window."""
904895
AboutWindow(self.root, version=get_app_version())
@@ -1043,7 +1034,8 @@ def start_generation_thread(self):
10431034
except:
10441035
pass
10451036
else:
1046-
self.actions_menu.entryconfig("Generate HTML Demo...", state='disabled')
1037+
if self.demo_available:
1038+
self.actions_menu.entryconfig("Generate HTML Demo...", state='disabled')
10471039
self.menubar.entryconfig("Settings", state="disabled")
10481040

10491041
# Show and start the progress bar
@@ -1125,14 +1117,19 @@ def on_generation_complete(self, success: bool):
11251117
if HAS_CTK_MENUBAR:
11261118
self.update_demo_menu_state_ctk(can_generate_demo)
11271119
else:
1128-
self.actions_menu.entryconfig("Generate HTML Demo...", state='normal' if can_generate_demo else 'disabled')
1120+
if self.demo_available:
1121+
self.actions_menu.entryconfig("Generate HTML Demo...", state='normal' if can_generate_demo else 'disabled')
1122+
11291123

11301124
if self.progress_bar.winfo_ismapped():
11311125
self.progress_bar.pack_forget()
11321126
self.log_text.configure(state='disabled') # Disable the log area at the very end
11331127

11341128
def start_demo_generation_thread(self):
11351129
"""Opens a dialog to get demo settings, then starts the generation."""
1130+
if not self.demo_available:
1131+
return
1132+
11361133
if not self.last_generated_filepath or not self.last_generated_script:
11371134
messagebox.showwarning("No Data", "Please generate a podcast first before creating a demo.",
11381135
parent=self.root)
@@ -1156,7 +1153,8 @@ def _on_demo_settings_confirmed(self, title: str, subtitle: str, output_dir: str
11561153
if HAS_CTK_MENUBAR:
11571154
self.update_demo_menu_state_ctk(False)
11581155
else:
1159-
self.actions_menu.entryconfig("Generate HTML Demo...", state='disabled')
1156+
if self.demo_available:
1157+
self.actions_menu.entryconfig("Generate HTML Demo...", state='disabled')
11601158

11611159
thread = threading.Thread(
11621160
target=self.run_demo_generation,
@@ -1196,8 +1194,9 @@ def run_demo_generation(self, script_content: str, audio_filepath: str, title: s
11961194
if HAS_CTK_MENUBAR:
11971195
self.root.after(0, lambda: self.update_demo_menu_state_ctk(can_generate_demo))
11981196
else:
1199-
self.root.after(0, lambda: self.actions_menu.entryconfig("Generate HTML Demo...",
1200-
state='normal' if can_generate_demo else 'disabled'))
1197+
if self.demo_available:
1198+
self.root.after(0, lambda: self.actions_menu.entryconfig("Generate HTML Demo...",
1199+
state='normal' if can_generate_demo else 'disabled'))
12011200
# Clean up the temporary file
12021201
if temp_script_file and os.path.exists(temp_script_file):
12031202
os.remove(temp_script_file)
@@ -1526,8 +1525,6 @@ def _run_fetch():
15261525
self.logger.warning(f"Failed to pre-fetch ElevenLabs voices: {e}")
15271526
self.elevenlabs_voices_cache = []
15281527

1529-
threading.Thread(target=_run_fetch, daemon=True).start()
1530-
15311528

15321529
def show_error_and_log(*args):
15331530
"""Global exception handler with improved debugging."""

requirements.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1+
# Core dependencies for the Podcast Generator
2+
3+
# Web server
14
Flask
25
gunicorn
6+
python-dotenv
7+
8+
# Google and ElevenLabs APIs
39
google-genai
410
elevenlabs
11+
12+
# Utility
513
requests
6-
python-dotenv
7-
keyring

0 commit comments

Comments
 (0)