Skip to content

Commit baf4778

Browse files
authored
chore: add nicer progress display to voice-gen scripts (#163)
1 parent 226789e commit baf4778

File tree

4 files changed

+326
-88
lines changed

4 files changed

+326
-88
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,7 @@ requires-python = ">=3.8"
66
dependencies = [
77
"azure-cognitiveservices-speech==1.44.0",
88
"ffmpeg-normalize",
9+
"rich",
10+
"elevenlabs",
11+
"python-dotenv",
912
]

voice-gen-elevenlabs.py

Lines changed: 132 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
11
import csv
22
import os
33
import subprocess
4+
import sys
5+
from pathlib import Path
46
from dotenv import load_dotenv
57
from elevenlabs.client import ElevenLabs
68
from elevenlabs import play
79

10+
from rich.console import Console, Group
11+
from rich.live import Live
12+
from rich.progress import (
13+
BarColumn,
14+
Progress,
15+
TaskProgressColumn,
16+
TextColumn,
17+
TimeElapsedColumn,
18+
TimeRemainingColumn,
19+
)
20+
from rich.text import Text
21+
822
load_dotenv()
923
client = ElevenLabs()
1024

@@ -25,66 +39,131 @@
2539
# Add other languages here
2640
]
2741

28-
for csv_file, voice_name, output_dir in languages:
29-
print(f"\nProcesing file {csv_file}")
42+
in_ci = os.environ.get("GITHUB_ACTIONS", "").lower() == "true"
43+
total_files = len(languages)
3044

31-
with open(csv_file, newline="", encoding="utf-8") as f:
32-
reader = csv.DictReader(f)
45+
def process_csv_file(csv_file: str, voice_name: str, output_dir: str, processed_files: int, total_files: int) -> None:
46+
"""Process a single CSV file."""
47+
console = Console(force_terminal=not in_ci, no_color=in_ci)
48+
progress = Progress(
49+
TextColumn("[bold blue]{task.description}"),
50+
TextColumn("{task.fields[status]}", justify="left"),
51+
BarColumn(bar_width=None),
52+
TaskProgressColumn(),
53+
TimeElapsedColumn(),
54+
TimeRemainingColumn(),
55+
console=console,
56+
transient=False,
57+
expand=True,
58+
)
3359

34-
for row in reader:
35-
if not row.get("Filename") or row.get("String ID", "").startswith("#"):
36-
continue
60+
class StatusLine:
61+
def __init__(self) -> None:
62+
self.message = ""
3763

38-
name = row["Filename"].split('.')[0]
39-
tr = row.get("Translation", "")
40-
subd = row.get("Path", "") # Subdirectory
64+
def update(self, message: str) -> None:
65+
self.message = message
4166

42-
full_dir = os.path.join("SOUNDS", output_dir, subd)
43-
os.makedirs(full_dir, exist_ok=True)
44-
output_mp3 = os.path.join(full_dir, f"{name}.mp3")
45-
output_wav = os.path.join(full_dir, f"{name}.wav")
67+
def __rich_console__(self, console, options):
68+
yield Text(self.message)
4669

47-
# To save free tokens available on Elevenlabs - skip existing files to avoid double generating
48-
if os.path.exists(output_wav):
49-
# print(f"\n\nWAV file exist, skipping: {output_wav}")
50-
continue
70+
status_line = StatusLine()
71+
layout = Group(status_line, progress)
5172

52-
print(f"\n\nGenerating MP3 file: {output_mp3} ...")
73+
print(f"\nProcessing file {csv_file}")
5374

54-
audio_generator = client.text_to_speech.convert(
55-
text=tr,
56-
voice_id=voice_name,
57-
model_id="eleven_multilingual_v2",
58-
output_format="mp3_44100_128"
75+
with open(csv_file, newline="", encoding="utf-8") as f, Live(layout, console=console, refresh_per_second=10, transient=False):
76+
reader = csv.DictReader(f)
77+
rows = list(reader)
78+
total_rows = len(rows)
79+
task_id = progress.add_task("Synthesizing", total=total_rows or None, status="")
80+
81+
def report(msg: str) -> None:
82+
if in_ci:
83+
progress.console.print(msg)
84+
else:
85+
status_line.update(msg)
86+
progress.refresh()
87+
88+
processed_count = 0
89+
line_count = 0
90+
91+
try:
92+
for row in rows:
93+
line_count += 1
94+
if not row.get("Filename") or row.get("String ID", "").startswith("#"):
95+
progress.update(task_id, advance=1)
96+
processed_count += 1
97+
continue
98+
99+
name = row["Filename"].split('.')[0]
100+
tr = row.get("Translation", "")
101+
subd = row.get("Path", "") # Subdirectory
102+
103+
full_dir = os.path.join("SOUNDS", output_dir, subd)
104+
os.makedirs(full_dir, exist_ok=True)
105+
output_mp3 = os.path.join(full_dir, f"{name}.mp3")
106+
output_wav = os.path.join(full_dir, f"{name}.wav")
107+
108+
# To save free tokens available on Elevenlabs - skip existing files to avoid double generating
109+
if os.path.exists(output_wav):
110+
report(f"[{line_count}/{total_rows}] Skipping \"{name}.wav\" as already exists.")
111+
progress.update(task_id, advance=1)
112+
processed_count += 1
113+
continue
114+
115+
report(f"[{line_count}/{total_rows}] Generating MP3 file: {output_mp3} ...")
116+
117+
audio_generator = client.text_to_speech.convert(
118+
text=tr,
119+
voice_id=voice_name,
120+
model_id="eleven_multilingual_v2",
121+
output_format="mp3_44100_128"
122+
)
123+
124+
audio_bytes = b''.join(audio_generator)
125+
126+
with open(output_mp3, "wb") as out_file:
127+
out_file.write(audio_bytes)
128+
129+
# Conversion MP3 -> WAV using ffmpeg command
130+
skip = row.get("Skip") or "0.0"
131+
ffmpeg_cmd = [
132+
"ffmpeg",
133+
"-ss", skip, # skip beginning in words that can be interpreted in a wrong lanuage
134+
"-y", # overwrite existing file
135+
"-i", output_mp3,
136+
"-ar", "32000", # sample rate 32 kHz
137+
"-ac", "1", # mono
138+
"-sample_fmt", "s16", # 16-bit PCM
139+
output_wav
140+
]
141+
142+
subprocess.run(ffmpeg_cmd, check=True)
143+
144+
# Remove temporary mp3 file
145+
if os.path.exists(output_mp3):
146+
os.remove(output_mp3)
147+
148+
progress.update(task_id, advance=1)
149+
processed_count += 1
150+
except KeyboardInterrupt:
151+
report(
152+
f"Interrupted. Processed {processed_files}/{total_files} files; {processed_count}/{total_rows} entries in current file."
59153
)
60-
61-
audio_bytes = b''.join(audio_generator)
62-
63-
with open(output_mp3, "wb") as out_file:
64-
out_file.write(audio_bytes)
65-
66-
print(f"MP3 saved: {output_mp3}")
67-
68-
69-
70-
# Conversion MP# -> WAV using ffmpeg command
71-
skip = row.get("Skip") or "0.0"
72-
ffmpeg_cmd = [
73-
"ffmpeg",
74-
"-ss", skip, # skip beginning in words that can be interpreted in a wrong lanuage
75-
"-y", # overwrite existing file
76-
"-i", output_mp3,
77-
"-ar", "32000", # sample rate 32 kHz
78-
"-ac", "1", # mono
79-
"-sample_fmt", "s16", # 16-bit PCM
80-
output_wav
81-
]
82-
83-
subprocess.run(ffmpeg_cmd, check=True)
84-
print(f"WAV for EdgeTX saved: {output_wav}")
85-
86-
# Remove temporary mp3 file
87-
if os.path.exists(output_mp3):
88-
os.remove(output_mp3)
154+
progress.update(task_id, completed=processed_count)
155+
raise SystemExit(1)
156+
157+
report(
158+
f'Finished processing {processed_files}/{total_files} files ({processed_count}/{total_rows} entries) in "{csv_file}".')
159+
160+
for idx, (csv_file, voice_name, output_dir) in enumerate(languages, 1):
161+
try:
162+
process_csv_file(csv_file, voice_name, output_dir, idx, total_files)
163+
except SystemExit as e:
164+
if e.code == 1:
165+
print("\nProcessing interrupted by user.")
166+
sys.exit(1)
167+
raise
89168

90169

voice-gen-glados.py

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@
77
import time
88
import urllib.parse
99
import urllib.request
10+
from pathlib import Path
11+
12+
from rich.console import Console, Group
13+
from rich.live import Live
14+
from rich.progress import (
15+
BarColumn,
16+
Progress,
17+
TaskProgressColumn,
18+
TextColumn,
19+
TimeElapsedColumn,
20+
TimeRemainingColumn,
21+
)
22+
from rich.text import Text
1023

1124

1225
def init_argparse() -> argparse.ArgumentParser:
@@ -85,6 +98,18 @@ def main() -> None:
8598
outdir = ""
8699
delay_time = args.delay
87100

101+
in_ci = os.environ.get("GITHUB_ACTIONS", "").lower() == "true"
102+
103+
csv_path = Path(csv_file).resolve()
104+
voices_root = Path(__file__).resolve().parent / "voices"
105+
106+
all_csvs = sorted(voices_root.glob("*.csv")) if voices_root.exists() else []
107+
if not all_csvs:
108+
all_csvs = sorted(csv_path.parent.glob("*.csv"))
109+
110+
total_files = len(all_csvs) if all_csvs else 1
111+
processed_files = next((idx + 1 for idx, f in enumerate(all_csvs) if f.resolve() == csv_path), 1)
112+
88113
if not os.path.isfile(csv_file):
89114
print("Error: voice file not found")
90115
sys.exit(1)
@@ -95,18 +120,58 @@ def main() -> None:
95120
reader = ((field.strip().strip('"') for field in row) for row in reader) # Strip spaces and quotes
96121
csv_rows = sum(1 for row in reader)
97122

98-
# Process CSV file
99-
with open(csv_file, 'rt') as csvfile:
123+
# Drop header row from progress count if present
124+
csv_rows = max(csv_rows - 1, 0)
125+
126+
console = Console(force_terminal=not in_ci, no_color=in_ci)
127+
progress = Progress(
128+
TextColumn("[bold blue]{task.description}"),
129+
TextColumn("{task.fields[status]}", justify="left"),
130+
BarColumn(bar_width=None),
131+
TaskProgressColumn(),
132+
TimeElapsedColumn(),
133+
TimeRemainingColumn(),
134+
console=console,
135+
transient=False,
136+
expand=True,
137+
)
138+
139+
class StatusLine:
140+
def __init__(self) -> None:
141+
self.message = ""
142+
143+
def update(self, message: str) -> None:
144+
self.message = message
145+
146+
def __rich_console__(self, console, options):
147+
yield Text(self.message)
148+
149+
status_line = StatusLine()
150+
layout = Group(status_line, progress)
151+
152+
# Process CSV file with progress bar
153+
with open(csv_file, 'rt') as csvfile, Live(layout, console=console, refresh_per_second=10, transient=False):
100154
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
101155
reader = ((field.strip().strip('"') for field in row) for row in reader) # Strip spaces and quotes
102-
line_count = 0
103-
for row in reader:
104-
row = list(row) # Convert the generator to a list
105-
if line_count == 0:
106-
# print(f'Column names are {", ".join(row)}')
107-
line_count += 1
108-
csv_rows -= 1
156+
task_id = progress.add_task("Synthesizing", total=csv_rows or None, status="")
157+
158+
def report(msg: str) -> None:
159+
if in_ci:
160+
progress.console.print(msg)
109161
else:
162+
status_line.update(msg)
163+
progress.refresh()
164+
165+
line_count = 0
166+
processed_count = 0
167+
168+
try:
169+
for row in reader:
170+
row = list(row) # Convert the generator to a list
171+
if line_count == 0:
172+
# absorb header row
173+
line_count += 1
174+
continue
110175
if row[4] is None or row[4] == "":
111176
outdir = os.path.join(basedir, "SOUNDS", langdir)
112177
else:
@@ -122,25 +187,37 @@ def main() -> None:
122187
os.makedirs(outdir)
123188

124189
if text is None or text == "":
125-
print(
190+
report(
126191
f'[{line_count}/{csv_rows}] Skipping as no text to translate')
192+
progress.update(task_id, advance=1)
193+
processed_count += 1
194+
line_count += 1
127195
continue
128196

129197
if not os.path.isfile(outfile):
130-
print(
198+
report(
131199
f'[{line_count}/{csv_rows}] Translate "{en_text}" to "{text}", save as "{outdir}{os.sep}{filename}".')
132200

133201
fetch_sample(text, tmpfile_fd, delay_time)
134202
process_sample(tmpfile, outfile)
135203
os.unlink(tmpfile)
136204

137205
else:
138-
print(
206+
report(
139207
f'[{line_count}/{csv_rows}] Skipping "{filename}" as already exists.')
140208

209+
progress.update(task_id, advance=1)
210+
processed_count += 1
141211
line_count += 1
142-
143-
print(f'Finished processing {csv_rows} entries from "{csv_file}" using {os.path.basename(__file__)}.')
212+
except KeyboardInterrupt:
213+
report(
214+
f"Interrupted. Processed {processed_files}/{total_files} files; {processed_count}/{csv_rows} entries in current file."
215+
)
216+
progress.update(task_id, completed=processed_count)
217+
raise SystemExit(1)
218+
219+
report(
220+
f'Finished processing {processed_files}/{total_files} files ({processed_count}/{csv_rows} entries) from "{csv_file}" using {os.path.basename(__file__)}.')
144221

145222

146223
if __name__ == "__main__":

0 commit comments

Comments
 (0)