Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ def launch(self):
params = params + pipeline_params
btn_run.click(fn=self.whisper_inf.transcribe_file,
inputs=params,
outputs=[tb_indicator, files_subtitles])
outputs=[tb_indicator, files_subtitles]).then(
fn=lambda value: gr.update(value=None), inputs=input_file, outputs=input_file)
btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)

with gr.TabItem(_("Youtube")): # tab2
Expand Down
79 changes: 56 additions & 23 deletions modules/whisper/base_transcription_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import gc
from copy import deepcopy
import time
import shutil
import tempfile

from modules.uvr.music_separator import MusicSeparator
from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
Expand Down Expand Up @@ -203,6 +205,26 @@ def run(self,
total_elapsed_time = time.time() - start_time
return result, total_elapsed_time

def isGradioCachePath(self, in_path: str):
"""
Check if given path is a gradio chache path

Parameters
----------
in_path
the path to check

Returns
----------
bool: True if it is a Gradio cache path, False otherwise
"""
path = os.path.normpath(in_path)
path = path.split(os.sep)
for p in path:
if p.lower() == "gradio":
return True
return False

def transcribe_file(self,
files: Optional[List] = None,
input_folder_path: Optional[str] = None,
Expand Down Expand Up @@ -258,37 +280,48 @@ def transcribe_file(self,
if files and isinstance(files[0], gr.utils.NamedString):
files = [file.name for file in files]


progress(0, desc="getting input files")
files_info = {}
for file in files:
transcribed_segments, time_for_task = self.run(
file,
progress,
file_format,
add_timestamp,
*pipeline_params,
)

file_name, file_ext = os.path.splitext(os.path.basename(file))
if save_same_dir and input_folder_path:
output_dir = os.path.dirname(file)
with tempfile.TemporaryDirectory(dir=".", prefix="_tmp_input") as tmp_folder:
tmp_files = []
for f in files:
if self.isGradioCachePath(f):
# likely a gradio cached file, so move it
tmp_files.append(shutil.move(f, tmp_folder))
else:
# likely no gradio cached file, use the original one
tmp_files.append(f)
for file in tmp_files:
transcribed_segments, time_for_task = self.run(
file,
progress,
file_format,
add_timestamp,
*pipeline_params,
)

file_name, file_ext = os.path.splitext(os.path.basename(file))
if save_same_dir and input_folder_path:
output_dir = os.path.dirname(file)
subtitle, file_path = generate_file(
output_dir=output_dir,
output_file_name=file_name,
output_format=file_format,
result=transcribed_segments,
add_timestamp=add_timestamp,
**writer_options
)

subtitle, file_path = generate_file(
output_dir=output_dir,
output_dir=self.output_dir,
output_file_name=file_name,
output_format=file_format,
result=transcribed_segments,
add_timestamp=add_timestamp,
**writer_options
)

subtitle, file_path = generate_file(
output_dir=self.output_dir,
output_file_name=file_name,
output_format=file_format,
result=transcribed_segments,
add_timestamp=add_timestamp,
**writer_options
)
files_info[file_name] = {"subtitle": read_file(file_path), "time_for_task": time_for_task, "path": file_path}
files_info[file_name] = {"subtitle": read_file(file_path), "time_for_task": time_for_task, "path": file_path}

total_result = ''
total_time = 0
Expand Down
Loading