chore: add nicer progress display to voice-gen scripts (#163)

pfeerick · web-flow · commit baf4778673ec · 2026-01-20T09:46:59.000+10:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,4 +6,7 @@ requires-python = ">=3.8"
 dependencies = [
     "azure-cognitiveservices-speech==1.44.0",
     "ffmpeg-normalize",
+    "rich",
+    "elevenlabs",
+    "python-dotenv",
 ]
diff --git a/voice-gen-elevenlabs.py b/voice-gen-elevenlabs.py
@@ -1,10 +1,24 @@
 import csv
 import os
 import subprocess
+import sys
+from pathlib import Path
 from dotenv import load_dotenv
 from elevenlabs.client import ElevenLabs
 from elevenlabs import play
 
+from rich.console import Console, Group
+from rich.live import Live
+from rich.progress import (
+    BarColumn,
+    Progress,
+    TaskProgressColumn,
+    TextColumn,
+    TimeElapsedColumn,
+    TimeRemainingColumn,
+)
+from rich.text import Text
+
 load_dotenv()
 client = ElevenLabs()
 
@@ -25,66 +39,131 @@
     # Add other languages here
 ]
 
-for csv_file, voice_name, output_dir in languages:
-    print(f"\nProcesing file {csv_file}")
+in_ci = os.environ.get("GITHUB_ACTIONS", "").lower() == "true"
+total_files = len(languages)
 
-    with open(csv_file, newline="", encoding="utf-8") as f:
-        reader = csv.DictReader(f)
+def process_csv_file(csv_file: str, voice_name: str, output_dir: str, processed_files: int, total_files: int) -> None:
+    """Process a single CSV file."""
+    console = Console(force_terminal=not in_ci, no_color=in_ci)
+    progress = Progress(
+        TextColumn("[bold blue]{task.description}"),
+        TextColumn("{task.fields[status]}", justify="left"),
+        BarColumn(bar_width=None),
+        TaskProgressColumn(),
+        TimeElapsedColumn(),
+        TimeRemainingColumn(),
+        console=console,
+        transient=False,
+        expand=True,
+    )
 
-        for row in reader:
-            if not row.get("Filename") or row.get("String ID", "").startswith("#"):
-                continue
+    class StatusLine:
+        def __init__(self) -> None:
+            self.message = ""
 
-            name = row["Filename"].split('.')[0]
-            tr = row.get("Translation", "")
-            subd = row.get("Path", "")  # Subdirectory
+        def update(self, message: str) -> None:
+            self.message = message
 
-            full_dir = os.path.join("SOUNDS", output_dir, subd)
-            os.makedirs(full_dir, exist_ok=True)
-            output_mp3 = os.path.join(full_dir, f"{name}.mp3")
-            output_wav = os.path.join(full_dir, f"{name}.wav")
+        def __rich_console__(self, console, options):
+            yield Text(self.message)
 
-            # To save free tokens available on Elevenlabs - skip existing files to avoid double generating
-            if os.path.exists(output_wav):
-                # print(f"\n\nWAV file exist, skipping: {output_wav}")
-                continue
+    status_line = StatusLine()
+    layout = Group(status_line, progress)
 
-            print(f"\n\nGenerating MP3 file: {output_mp3} ...")
+    print(f"\nProcessing file {csv_file}")
 
-            audio_generator = client.text_to_speech.convert(
-                text=tr,
-                voice_id=voice_name,          
-                model_id="eleven_multilingual_v2",
-                output_format="mp3_44100_128"
+    with open(csv_file, newline="", encoding="utf-8") as f, Live(layout, console=console, refresh_per_second=10, transient=False):
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        total_rows = len(rows)
+        task_id = progress.add_task("Synthesizing", total=total_rows or None, status="")
+
+        def report(msg: str) -> None:
+            if in_ci:
+                progress.console.print(msg)
+            else:
+                status_line.update(msg)
+                progress.refresh()
+
+        processed_count = 0
+        line_count = 0
+
+        try:
+            for row in rows:
+                line_count += 1
+                if not row.get("Filename") or row.get("String ID", "").startswith("#"):
+                    progress.update(task_id, advance=1)
+                    processed_count += 1
+                    continue
+
+                name = row["Filename"].split('.')[0]
+                tr = row.get("Translation", "")
+                subd = row.get("Path", "")  # Subdirectory
+
+                full_dir = os.path.join("SOUNDS", output_dir, subd)
+                os.makedirs(full_dir, exist_ok=True)
+                output_mp3 = os.path.join(full_dir, f"{name}.mp3")
+                output_wav = os.path.join(full_dir, f"{name}.wav")
+
+                # To save free tokens available on Elevenlabs - skip existing files to avoid double generating
+                if os.path.exists(output_wav):
+                    report(f"[{line_count}/{total_rows}] Skipping \"{name}.wav\" as already exists.")
+                    progress.update(task_id, advance=1)
+                    processed_count += 1
+                    continue
+
+                report(f"[{line_count}/{total_rows}] Generating MP3 file: {output_mp3} ...")
+
+                audio_generator = client.text_to_speech.convert(
+                    text=tr,
+                    voice_id=voice_name,          
+                    model_id="eleven_multilingual_v2",
+                    output_format="mp3_44100_128"
+                )
+
+                audio_bytes = b''.join(audio_generator)
+
+                with open(output_mp3, "wb") as out_file:
+                    out_file.write(audio_bytes)
+
+                # Conversion MP3 -> WAV using ffmpeg command
+                skip = row.get("Skip") or "0.0"
+                ffmpeg_cmd = [
+                    "ffmpeg",
+                    "-ss", skip, # skip beginning in words that can be interpreted in a wrong lanuage
+                    "-y",  # overwrite existing file
+                    "-i", output_mp3,
+                    "-ar", "32000",   # sample rate 32 kHz
+                    "-ac", "1",       # mono
+                    "-sample_fmt", "s16",  # 16-bit PCM
+                    output_wav
+                ]
+
+                subprocess.run(ffmpeg_cmd, check=True)
+
+                # Remove temporary mp3 file
+                if os.path.exists(output_mp3):
+                    os.remove(output_mp3)
+
+                progress.update(task_id, advance=1)
+                processed_count += 1
+        except KeyboardInterrupt:
+            report(
+                f"Interrupted. Processed {processed_files}/{total_files} files; {processed_count}/{total_rows} entries in current file."
             )
-
-            audio_bytes = b''.join(audio_generator)
-
-            with open(output_mp3, "wb") as out_file:
-                out_file.write(audio_bytes)
-
-            print(f"MP3 saved: {output_mp3}")
-
-
-
-            # Conversion MP# -> WAV using ffmpeg command
-            skip = row.get("Skip") or "0.0"
-            ffmpeg_cmd = [
-                "ffmpeg",
-                "-ss", skip, # skip beginning in words that can be interpreted in a wrong lanuage
-                "-y",  # overwrite existing file
-                "-i", output_mp3,
-                "-ar", "32000",   # sample rate 32 kHz
-                "-ac", "1",       # mono
-                "-sample_fmt", "s16",  # 16-bit PCM
-                output_wav
-            ]
-
-            subprocess.run(ffmpeg_cmd, check=True)
-            print(f"WAV for EdgeTX saved: {output_wav}")
-
-            # Remove temporary mp3 file
-            if os.path.exists(output_mp3):
-                os.remove(output_mp3)
+            progress.update(task_id, completed=processed_count)
+            raise SystemExit(1)
+
+        report(
+            f'Finished processing {processed_files}/{total_files} files ({processed_count}/{total_rows} entries) in "{csv_file}".')
+
+for idx, (csv_file, voice_name, output_dir) in enumerate(languages, 1):
+    try:
+        process_csv_file(csv_file, voice_name, output_dir, idx, total_files)
+    except SystemExit as e:
+        if e.code == 1:
+            print("\nProcessing interrupted by user.")
+            sys.exit(1)
+        raise
         
 
diff --git a/voice-gen-glados.py b/voice-gen-glados.py
@@ -7,6 +7,19 @@
 import time
 import urllib.parse
 import urllib.request
+from pathlib import Path
+
+from rich.console import Console, Group
+from rich.live import Live
+from rich.progress import (
+    BarColumn,
+    Progress,
+    TaskProgressColumn,
+    TextColumn,
+    TimeElapsedColumn,
+    TimeRemainingColumn,
+)
+from rich.text import Text
 
 
 def init_argparse() -> argparse.ArgumentParser:
@@ -85,6 +98,18 @@ def main() -> None:
     outdir = ""
     delay_time = args.delay
 
+    in_ci = os.environ.get("GITHUB_ACTIONS", "").lower() == "true"
+
+    csv_path = Path(csv_file).resolve()
+    voices_root = Path(__file__).resolve().parent / "voices"
+
+    all_csvs = sorted(voices_root.glob("*.csv")) if voices_root.exists() else []
+    if not all_csvs:
+        all_csvs = sorted(csv_path.parent.glob("*.csv"))
+
+    total_files = len(all_csvs) if all_csvs else 1
+    processed_files = next((idx + 1 for idx, f in enumerate(all_csvs) if f.resolve() == csv_path), 1)
+
     if not os.path.isfile(csv_file):
         print("Error: voice file not found")
         sys.exit(1)
@@ -95,18 +120,58 @@ def main() -> None:
         reader = ((field.strip().strip('"') for field in row) for row in reader)  # Strip spaces and quotes
         csv_rows = sum(1 for row in reader)
 
-    # Process CSV file
-    with open(csv_file, 'rt') as csvfile:
+    # Drop header row from progress count if present
+    csv_rows = max(csv_rows - 1, 0)
+
+    console = Console(force_terminal=not in_ci, no_color=in_ci)
+    progress = Progress(
+        TextColumn("[bold blue]{task.description}"),
+        TextColumn("{task.fields[status]}", justify="left"),
+        BarColumn(bar_width=None),
+        TaskProgressColumn(),
+        TimeElapsedColumn(),
+        TimeRemainingColumn(),
+        console=console,
+        transient=False,
+        expand=True,
+    )
+
+    class StatusLine:
+        def __init__(self) -> None:
+            self.message = ""
+
+        def update(self, message: str) -> None:
+            self.message = message
+
+        def __rich_console__(self, console, options):
+            yield Text(self.message)
+
+    status_line = StatusLine()
+    layout = Group(status_line, progress)
+
+    # Process CSV file with progress bar
+    with open(csv_file, 'rt') as csvfile, Live(layout, console=console, refresh_per_second=10, transient=False):
         reader = csv.reader(csvfile, delimiter=',', quotechar='"')
         reader = ((field.strip().strip('"') for field in row) for row in reader)  # Strip spaces and quotes
-        line_count = 0
-        for row in reader:
-            row = list(row)  # Convert the generator to a list
-            if line_count == 0:
-                # print(f'Column names are {", ".join(row)}')
-                line_count += 1
-                csv_rows -= 1
+        task_id = progress.add_task("Synthesizing", total=csv_rows or None, status="")
+
+        def report(msg: str) -> None:
+            if in_ci:
+                progress.console.print(msg)
             else:
+                status_line.update(msg)
+                progress.refresh()
+
+        line_count = 0
+        processed_count = 0
+
+        try:
+            for row in reader:
+                row = list(row)  # Convert the generator to a list
+                if line_count == 0:
+                    # absorb header row
+                    line_count += 1
+                    continue
                 if row[4] is None or row[4] == "":
                     outdir = os.path.join(basedir, "SOUNDS", langdir)
                 else:
@@ -122,25 +187,37 @@ def main() -> None:
                     os.makedirs(outdir)
 
                 if text is None or text == "":
-                    print(
+                    report(
                         f'[{line_count}/{csv_rows}] Skipping as no text to translate')
+                    progress.update(task_id, advance=1)
+                    processed_count += 1
+                    line_count += 1
                     continue
 
                 if not os.path.isfile(outfile):
-                    print(
+                    report(
                         f'[{line_count}/{csv_rows}] Translate "{en_text}" to "{text}", save as "{outdir}{os.sep}{filename}".')
 
                     fetch_sample(text, tmpfile_fd, delay_time)
                     process_sample(tmpfile, outfile)
                     os.unlink(tmpfile)
 
                 else:
-                    print(
+                    report(
                         f'[{line_count}/{csv_rows}] Skipping "{filename}" as already exists.')
 
+                progress.update(task_id, advance=1)
+                processed_count += 1
                 line_count += 1
-
-        print(f'Finished processing {csv_rows} entries from "{csv_file}" using {os.path.basename(__file__)}.')
+        except KeyboardInterrupt:
+            report(
+                f"Interrupted. Processed {processed_files}/{total_files} files; {processed_count}/{csv_rows} entries in current file."
+            )
+            progress.update(task_id, completed=processed_count)
+            raise SystemExit(1)
+
+        report(
+            f'Finished processing {processed_files}/{total_files} files ({processed_count}/{csv_rows} entries) from "{csv_file}" using {os.path.basename(__file__)}.')
 
 
 if __name__ == "__main__":
diff --git a/voice-gen.py b/voice-gen.py

Original file line number	Diff line number	Diff line change
`@@ -6,4 +6,7 @@ requires-python = ">=3.8"`
`6`	`6`	`dependencies = [`
`7`	`7`	`"azure-cognitiveservices-speech==1.44.0",`
`8`	`8`	`"ffmpeg-normalize",`
	`9`	`+ "rich",`
	`10`	`+ "elevenlabs",`
	`11`	`+ "python-dotenv",`
`9`	`12`	`]`