-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSubGen.py
More file actions
116 lines (97 loc) · 3.63 KB
/
SubGen.py
File metadata and controls
116 lines (97 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import time
import warnings
import tkinter as tk
from tkinter import filedialog
from faster_whisper import WhisperModel
import subprocess
from tqdm import tqdm
# Suppress warnings
warnings.filterwarnings("ignore")
# Select video file
def select_video_file():
root = tk.Tk()
root.withdraw()
video_path = filedialog.askopenfilename(
title="Select Video File",
filetypes=[("Video Files", "*.mp4 *.mov *.avi *.mkv *.flv")]
)
return video_path
# Extract audio using ffmpeg
def extract_audio(video_path, audio_path):
print("[*] Extracting audio...")
command = [
"ffmpeg",
"-y", # overwrite if exists
"-i", video_path,
"-vn",
"-acodec", "pcm_s16le",
"-ar", "16000",
"-ac", "1",
audio_path
]
try:
subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, check=True)
print("[✓] Audio extracted")
except subprocess.CalledProcessError:
print("[!] Failed to extract audio. Is ffmpeg installed and in PATH?")
raise SystemExit
# Transcribe audio and save to .srt and .txt
def transcribe_to_srt(audio_path, srt_path="subtitles.srt", txt_path="transcript.txt", model_size="medium"):
print("[*] Loading Whisper model...")
model = WhisperModel(model_size, compute_type="float16", device="auto")
print("[*] Transcribing...")
segments_gen, info = model.transcribe(audio_path, beam_size=5, word_timestamps=False)
segments = list(segments_gen) # Make reusable
print(f"[ℹ️] Detected language: {info.language}")
def format_timestamp(seconds):
hrs = int(seconds // 3600)
mins = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds - int(seconds)) * 1000)
return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
# Write SRT
with open(srt_path, "w", encoding="utf-8") as f:
for i, segment in enumerate(tqdm(segments, desc="Writing subtitles", unit="segment")):
start = format_timestamp(segment.start)
end = format_timestamp(segment.end)
f.write(f"{i+1}\n{start} --> {end}\n{segment.text.strip()}\n\n")
# Write TXT
with open(txt_path, "w", encoding="utf-8") as txt_file:
for segment in segments:
txt_file.write(segment.text.strip() + "\n")
print(f"[✓] Subtitle file saved as: {srt_path}")
print(f"[✓] Transcript file saved as: {txt_path}")
# Main workflow
def main():
start_time = time.time()
video_path = select_video_file()
if not video_path:
print("[!] No video selected.")
return
base_name = os.path.splitext(os.path.basename(video_path))[0]
audio_path = f"{base_name}_audio.wav"
# Ask user where to save SRT
root = tk.Tk()
root.withdraw()
srt_path = filedialog.asksaveasfilename(
title="Save Subtitle File As",
defaultextension=".srt",
initialfile=f"{base_name}.srt",
filetypes=[("SubRip Subtitle", "*.srt")]
)
if not srt_path:
print("[!] No save location selected.")
return
txt_path = os.path.splitext(srt_path)[0] + ".txt"
extract_audio(video_path, audio_path)
transcribe_to_srt(audio_path, srt_path, txt_path)
# Auto-delete audio after processing
os.remove(audio_path)
end_time = time.time()
elapsed = end_time - start_time
minutes = int(elapsed // 60)
seconds = int(elapsed % 60)
print(f"\n[⏱] Total time taken: {minutes} min {seconds} sec")
if __name__ == "__main__":
main()