-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathVideoToText.py
More file actions
82 lines (67 loc) · 2.71 KB
/
VideoToText.py
File metadata and controls
82 lines (67 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import tkinter as tk
from tkinter import ttk, filedialog
from ttkbootstrap import Style
import moviepy.editor as mp
import speech_recognition as sr
from pydub import AudioSegment, silence
from gtts import gTTS
def extract_audio(video_path):
clip = mp.VideoFileClip(video_path)
clip.audio.write_audiofile("temp.wav")
return "temp.wav"
def get_audio_chunks(audio_path):
audio = AudioSegment.from_wav(audio_path)
silence_thresh = -36
silent_ranges = silence.detect_silence(audio, min_silence_len=500, silence_thresh=silence_thresh)
chunks = []
prev_end = 0
for start, end in silent_ranges:
chunks.append((prev_end, start, audio[prev_end:start]))
prev_end = end
chunks.append((prev_end, len(audio), audio[prev_end:]))
return chunks
def speech_to_text(audio_chunk):
recognizer = sr.Recognizer()
audio_chunk.export("temp_chunk.wav", format="wav")
with sr.AudioFile("temp_chunk.wav") as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
text = "[inaudible]"
return text
def create_subtitle():
video_path = filedialog.askopenfilename(title="Select video file")
if video_path:
audio_path = extract_audio(video_path)
audio_chunks = get_audio_chunks(audio_path)
subtitles = []
for _, (_, _, chunk) in enumerate(audio_chunks): # Ignore the start and end times
text = speech_to_text(chunk)
subtitles.append(text)
with open("subtitle.txt", "w") as file: # Save as a .txt file since this isn't a proper SRT anymore
file.write('\n'.join(subtitles))
def text_to_speech(text, lang='en-us'):
tts = gTTS(text=text, lang=lang, slow=False)
audio_filename = 'output.mp3'
tts.save(audio_filename)
return audio_filename
def convert_subtitle_to_speech():
subtitle_path = filedialog.askopenfilename(title="Select subtitle file")
if subtitle_path:
with open(subtitle_path, 'r', encoding='utf-8') as file:
subtitle_content = file.read()
audio_filename = text_to_speech(subtitle_content)
print(f'Audio saved as {audio_filename}')
# GUI
root = tk.Tk()
style = Style(theme="darkly")
root.title("Subtitle Creator")
root.geometry('300x200')
frame = ttk.Frame(root)
frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
btn_create = ttk.Button(frame, text="Create Subtitle", command=create_subtitle, style='TButton')
btn_create.pack(pady=5, padx=10, fill='x')
btn_speech = ttk.Button(frame, text="Convert Subtitle to Speech", command=convert_subtitle_to_speech, style='TButton')
btn_speech.pack(pady=5, padx=10, fill='x')
root.mainloop()