Skip to content

Commit 26e260e

Browse files
authored
added FFMPEG support to process input files
1 parent 8dd79ed commit 26e260e

File tree

1 file changed

+88
-10
lines changed

1 file changed

+88
-10
lines changed

whisperGUI.py

Lines changed: 88 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
from gooey import Gooey, GooeyParser
22
import subprocess
3+
import os
34

4-
@Gooey(required_cols=1,
5-
target='main.exe',
6-
suppress_gooey_flag=True,
7-
program_name='whisper.cppGUI',
5+
@Gooey(program_name='whisper.cppGUI',
86
menu=[{'name': 'File',
97
'items': [{
108
'type': 'AboutDialog',
@@ -22,21 +20,18 @@ def main():
2220
parser = GooeyParser(description='GUI for whisper.cpp, a high-performance C++ port of OpenAI\'s Whisper')
2321

2422
parser.add_argument(
25-
'-f',
2623
'--file',
2724
metavar='WAV file to transcribe',
2825
help='convert with: ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav',
2926
widget='FileChooser')
3027

3128
parser.add_argument(
32-
'-m',
3329
'--model',
3430
metavar='GGML model (.bin)',
3531
help='select GGML model (tiny,base,small,medium,large)',
3632
widget='FileChooser')
3733

3834
parser.add_argument(
39-
'-l',
4035
'--language',
4136
metavar='Language',
4237
help='select language for transcription',
@@ -71,9 +66,92 @@ def main():
7166
'--speed-up',
7267
action='store_true',
7368
help='check to speed up audio by factor of 2 (faster processing, reduced accuracy)')
74-
75-
parser.parse_args()
69+
args = parser.parse_args()
70+
71+
#pass args for later use in args=main()
72+
return args
73+
7674

7775

7876
if __name__ == '__main__':
79-
main()
77+
78+
# this section is inspired by
79+
# https://stackoverflow.com/questions/48767005/using-python-gooey-how-to-open-another-gui-after-clicking-one-out-of-multiple-bu
80+
81+
#get arguments from main() for use here
82+
args=main()
83+
84+
#workaround to process the arguments that evaluate to "True" or "False"
85+
86+
if args.translate == True:
87+
arg_translate = "--translate"
88+
else:
89+
arg_translate = ""
90+
91+
if args.output_txt == True:
92+
arg_out_txt = "--output-txt"
93+
else:
94+
arg_out_txt = ""
95+
96+
if args.output_srt == True:
97+
arg_out_srt = "--output-srt"
98+
else:
99+
arg_out_srt = ""
100+
101+
if args.output_vtt == True:
102+
arg_out_vtt = "--output-vtt"
103+
else:
104+
arg_out_vtt = ""
105+
106+
if args.speed_up == True:
107+
arg_speed = "--speed-up"
108+
else:
109+
arg_speed = ""
110+
111+
#first we process the input file with ffmpeg
112+
#workaround required to show whisperCPP output in the Gooey window
113+
#reference https://github.com/chriskiehl/Gooey/issues/355
114+
115+
startupinfo = subprocess.STARTUPINFO()
116+
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
117+
118+
#here we construct the command line for ffmpeg
119+
cmd = f"ffmpeg.exe -i \"{args.file}\" -ar 16000 -ac 1 -c:a pcm_s16le output.wav"
120+
121+
#here we call the program with extra parameters to capture ffmpeg output
122+
process=subprocess.Popen(cmd,
123+
startupinfo=startupinfo,
124+
stdout=subprocess.PIPE,
125+
stdin=subprocess.PIPE,
126+
stderr=subprocess.STDOUT)
127+
128+
#here we print ffmpeg output to the Gooey window
129+
for line in process.stdout:
130+
line1=line.decode('utf-8')
131+
print(line1.rstrip())
132+
133+
134+
#here we run whisperCPP
135+
#workaround required to show whisperCPP output in the Gooey window
136+
#reference https://github.com/chriskiehl/Gooey/issues/355
137+
138+
startupinfo = subprocess.STARTUPINFO()
139+
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
140+
141+
#here we construct the command line for whisperCPP
142+
cmd = f"main.exe -f output.wav -m {args.model} -l {args.language} {arg_translate} {arg_out_txt} {arg_out_srt} {arg_out_vtt} {arg_speed}"
143+
144+
#here we call the program with extra parameters to capture whisperCPP output
145+
process=subprocess.Popen(cmd,
146+
startupinfo=startupinfo,
147+
stdout=subprocess.PIPE,
148+
stdin=subprocess.PIPE,
149+
stderr=subprocess.STDOUT)
150+
151+
#here we print whisperCPP output to the Gooey window
152+
for line in process.stdout:
153+
line1=line.decode('utf-8')
154+
print(line1.rstrip())
155+
156+
#remove output.wav temporary file created by ffmpeg
157+
os.remove("output.wav")

0 commit comments

Comments
 (0)