Skip to content

Commit c3f5516

Browse files
authored
Added FFMPEG-based speedup factor
Added FFMPEG-based speedup factor and textbox for other parameters
1 parent bc407cc commit c3f5516

File tree

1 file changed

+64
-6
lines changed

1 file changed

+64
-6
lines changed

whisperGUI.py

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from gooey import Gooey, GooeyParser
22
import subprocess
33
import os
4+
import re
45

56
@Gooey(program_name='whisper.cppGUI',
67
menu=[{'name': 'File',
@@ -66,15 +67,32 @@ def main():
6667
'--speed-up',
6768
action='store_true',
6869
help='check to speed up audio by factor of 2 (faster processing, reduced accuracy)')
69-
args = parser.parse_args()
70+
71+
parser.add_argument(
72+
'--speed-up2',
73+
action='store',
74+
default=1,
75+
help='alternative speed up based on FFMPEG. Type here the speed up factor (e.g. 1.5). This enables automatically SRT output with corrected timestamps',
76+
widget='DecimalField')
77+
78+
parser.add_argument(
79+
'--others',
80+
action='store',
81+
default="",
82+
help='This textbox lets the user add other command line parameters that are not included in this GUI')
83+
7084

85+
args = parser.parse_args()
86+
#enable for debugging
87+
# print(args)
7188
#pass args for later use in args=main()
7289
return args
7390

7491

7592

7693
if __name__ == '__main__':
7794

95+
7896
# this section is inspired by
7997
# https://stackoverflow.com/questions/48767005/using-python-gooey-how-to-open-another-gui-after-clicking-one-out-of-multiple-bu
8098

@@ -108,16 +126,27 @@ def main():
108126
else:
109127
arg_speed = ""
110128

129+
#check if ffmpeg speedup was selected. If true, disable txt and vtt output
130+
#and disable whispercpp internal speed up.
131+
if float(args.speed_up2) != 1.0:
132+
arg_out_txt = ""
133+
arg_out_vtt = ""
134+
arg_speed = ""
135+
arg_out_srt = "--output-srt"
136+
111137
#first we process the input file with ffmpeg
112-
#workaround required to show whisperCPP output in the Gooey window
138+
#workaround required to show ffmpeg output in the Gooey window
113139
#reference https://github.com/chriskiehl/Gooey/issues/355
114140

115141
startupinfo = subprocess.STARTUPINFO()
116142
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
117143

118-
#here we construct the command line for ffmpeg
119-
cmd = f"ffmpeg.exe -i \"{args.file}\" -ar 16000 -ac 1 -c:a pcm_s16le output.wav"
120-
144+
#here we construct the command line for ffmpeg and apply the FFMPEG speedup IF selected
145+
if float(args.speed_up2) != 1.0:
146+
cmd = f"ffmpeg.exe -y -i \"{args.file}\" -ar 16000 -ac 1 -c:a pcm_s16le -af atempo={args.speed_up2} output.wav"
147+
else:
148+
cmd = f"ffmpeg.exe -y -i \"{args.file}\" -ar 16000 -ac 1 -c:a pcm_s16le output.wav"
149+
121150
#here we call the program with extra parameters to capture ffmpeg output
122151
process=subprocess.Popen(cmd,
123152
startupinfo=startupinfo,
@@ -139,7 +168,7 @@ def main():
139168
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
140169

141170
#here we construct the command line for whisperCPP
142-
cmd = f"main.exe -f output.wav -m {args.model} -l {args.language} {arg_translate} {arg_out_txt} {arg_out_srt} {arg_out_vtt} {arg_speed}"
171+
cmd = f"main.exe -f output.wav -m {args.model} -l {args.language} {arg_translate} {arg_out_txt} {arg_out_srt} {arg_out_vtt} {arg_speed} {args.others}"
143172

144173
#here we call the program with extra parameters to capture whisperCPP output
145174
process=subprocess.Popen(cmd,
@@ -153,5 +182,34 @@ def main():
153182
line1=line.decode('utf-8')
154183
print(line1.rstrip())
155184

185+
#this section fixes the timestamps of the SRT file if the FFMPEG speedup was selected
186+
#
187+
188+
if float(args.speed_up2) != 1.0:
189+
speedup_factor = float(args.speed_up2) # assign the speedup factor
190+
with open("output.wav.srt", "r") as file: # Open the input SRT file
191+
content = file.read()
192+
file.close()
193+
matches = re.findall(r"\d{2}:\d{2}:\d{2},\d{3}", content) # Use regular expressions to match timestamps in the SRT file
194+
for match in matches: # Multiply the timestamps by the speedup factor
195+
parts = match.split(":")
196+
hours = int(parts[0])
197+
minutes = int(parts[1])
198+
seconds = int(parts[2].split(",")[0])
199+
milliseconds = int(parts[2].split(",")[1])
200+
total_milliseconds = (hours * 3600 + minutes * 60 + seconds) * 1000 + milliseconds
201+
total_milliseconds *= speedup_factor
202+
new_hours = f'{int(total_milliseconds // 3600000):02d}'
203+
new_minutes = f'{int((total_milliseconds % 3600000) // 60000):02d}'
204+
new_seconds = f'{int(((total_milliseconds % 3600000) % 60000) // 1000):02d}'
205+
new_milliseconds = f'{int(((total_milliseconds % 3600000) % 60000) % 1000):03d}'
206+
new_time = f"{new_hours}:{new_minutes}:{new_seconds},{new_milliseconds}"
207+
content = content.replace(match, new_time)
208+
with open("output-fix.wav.srt", "w") as file: # Write the adjusted SRT file to the output file
209+
file.write(content)
210+
file.close()
211+
os.remove("output.wav.srt") #remove output.srt temporary file
212+
#end of section that fixes the timestamps
213+
156214
#remove output.wav temporary file created by ffmpeg
157215
os.remove("output.wav")

0 commit comments

Comments
 (0)