Skip to content

Commit 837fafb

Browse files
committed
updates
1 parent cd35a5e commit 837fafb

File tree

3 files changed

+322
-163
lines changed

3 files changed

+322
-163
lines changed

Programming/Identify.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import sys
2+
import re
3+
import os
4+
import tkinter as tk
5+
from tkinter import scrolledtext, messagebox
6+
7+
def identify_speakers_srt(srt_filepath, custom_speaker_map):
8+
"""
9+
Replaces speaker labels in an SRT file with identified names and occupations
10+
using a provided custom speaker map.
11+
Saves the modified content to a new SRT file.
12+
13+
Args:
14+
srt_filepath (str): The path to the input SRT file.
15+
custom_speaker_map (dict): A dictionary containing speaker label mappings.
16+
"""
17+
18+
try:
19+
with open(srt_filepath, 'r', encoding='utf-8') as srt_file:
20+
srt_content = srt_file.readlines()
21+
except FileNotFoundError:
22+
messagebox.showerror("Error", f"SRT file not found at '{srt_filepath}'")
23+
return
24+
25+
modified_lines = []
26+
for line in srt_content:
27+
speaker_match = re.match(r'\[(SPEAKER_\d+)\]: (.*)', line)
28+
if speaker_match:
29+
speaker_label = speaker_match.group(1)
30+
dialogue = speaker_match.group(2)
31+
if speaker_label in custom_speaker_map:
32+
modified_speaker = custom_speaker_map[speaker_label]
33+
modified_line = f"[{modified_speaker}]: {dialogue}"
34+
modified_lines.append(modified_line + '\n')
35+
else:
36+
modified_lines.append(line) # Keep original if speaker not in map
37+
else:
38+
modified_lines.append(line)
39+
40+
# Create output file path
41+
base, ext = os.path.splitext(srt_filepath)
42+
output_filepath = f"{base}.speaker_identified{ext}"
43+
44+
try:
45+
with open(output_filepath, 'w', encoding='utf-8') as output_file:
46+
output_file.writelines(modified_lines)
47+
messagebox.showinfo("Success", f"Modified SRT saved to: '{output_filepath}'")
48+
except Exception as e:
49+
messagebox.showerror("Error", f"Error writing to output file '{output_filepath}': {e}")
50+
51+
def parse_speaker_map_text(map_text):
52+
"""
53+
Parses the pasted speaker map text into a dictionary.
54+
55+
Args:
56+
map_text (str): The text from the GUI text area.
57+
58+
Returns:
59+
dict: A dictionary with speaker labels as keys and names as values, or None if parsing fails.
60+
"""
61+
speaker_map = {}
62+
lines = map_text.strip().split('\n')
63+
for line in lines:
64+
line = line.strip()
65+
if not line or line.startswith('*'): # Skip empty lines and list bullets
66+
continue
67+
match = re.match(r'\*\*?(SPEAKER_\d+)\*\*?:\s*(.*)', line) #Handles bolding variations
68+
if match:
69+
speaker_label = match.group(1)
70+
speaker_name = match.group(2).strip()
71+
speaker_map[speaker_label] = speaker_name
72+
elif line.startswith('**'): # error handling if bold format is wrong
73+
messagebox.showerror("Error", f"Invalid speaker mapping format in line:\n'{line}'.\nUse format like: **SPEAKER_00**: Speaker Name")
74+
return None
75+
elif ':' in line: # basic fallback if no bolding intended but colon exists
76+
parts = line.split(':', 1)
77+
speaker_label = parts<source_id data="0" title="FULL VIDEO: President Donald Trump's meeting with Ukraine President Zelenskyy turns tense [um19Mf4dYes].830cb1e7-78a8-48f8-882c-199035df5869.srt" />.strip()
78+
speaker_name = parts[1].strip()
79+
if re.match(r'SPEAKER_\d+', speaker_label):
80+
speaker_map[speaker_label] = speaker_name
81+
else:
82+
messagebox.showerror("Error", f"Invalid speaker label format in line: '{line}'. Expected 'SPEAKER_XX:'")
83+
return None
84+
85+
86+
return speaker_map
87+
88+
def process_srt_gui():
89+
srt_file_path = srt_file_entry.get()
90+
map_text = speaker_map_text_area.get("1.0", tk.END) # Get text from Text area
91+
92+
if not os.path.exists(srt_file_path):
93+
messagebox.showerror("Error", f"SRT file not found: '{srt_file_path}'")
94+
return
95+
96+
custom_map = parse_speaker_map_text(map_text)
97+
if custom_map:
98+
identify_speakers_srt(srt_file_path, custom_map)
99+
100+
101+
if __name__ == "__main__":
102+
if len(sys.argv) != 2:
103+
print("Usage: Identify.py Full_video.srt")
104+
print(" A GUI will open to paste speaker mappings.")
105+
sys.exit(1)
106+
107+
srt_file = sys.argv[1]
108+
109+
window = tk.Tk()
110+
window.title("SRT Speaker Identifier")
111+
112+
tk.Label(window, text="SRT File Path:").pack(pady=5)
113+
srt_file_entry = tk.Entry(window, width=50)
114+
srt_file_entry.insert(0, srt_file) # Pre-fill with command line argument
115+
srt_file_entry.pack(pady=5)
116+
117+
118+
tk.Label(window, text="Paste Speaker Mappings (Format: **SPEAKER_XX**: Speaker Name):").pack(pady=5)
119+
speaker_map_text_area = scrolledtext.ScrolledText(window, height=10, width=60)
120+
speaker_map_text_area.pack(pady=10)
121+
122+
process_button = tk.Button(window, text="Process SRT", command=process_srt_gui)
123+
process_button.pack(pady=10)
124+
125+
window.mainloop()

0 commit comments

Comments
 (0)