-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathwhisper_jax_to_srt.py
More file actions
39 lines (33 loc) · 1.38 KB
/
whisper_jax_to_srt.py
File metadata and controls
39 lines (33 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import re
from pathlib import Path
def time_to_seconds(timestamp):
m, s = map(float, timestamp.split(':'))
return m * 60 + s
# Function to format time in seconds as SRT time (hh:mm:ss,ms)
def format_time(seconds):
milliseconds = int((seconds - int(seconds)) * 1000)
seconds = int(seconds)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def time_match(source_text: str):
# Find the pattern in the source text
match = re.search(r'\[(\d+:\d+\.\d+) -> (\d+:\d+\.\d+)]', source_text)
if match:
start_time, end_time = match.groups()
# Convert start and end times to seconds
start_seconds = time_to_seconds(start_time)
end_seconds = time_to_seconds(end_time)
# Format the times
formatted_start = format_time(start_seconds)
formatted_end = format_time(end_seconds)
# Replace the existing pattern with the formatted times
new_text = source_text.replace(match.group(0), f"[{formatted_start} --> {formatted_end}]")
return new_text
else:
return "Pattern not found in the source text."
# Example usage
src_file = r'g:/aud_cap/long_narrate.txt'
src_txt = Path(src_file).read_text(encoding='utf-8')
new_text = time_match(src_txt)
print(new_text[:100])