Skip to content

Commit b0ddb27

Browse files
committed
Bundle srt module with edge-tts
Fixes #383 Signed-off-by: rany <rany2@riseup.net>
1 parent f78c8ec commit b0ddb27

File tree

7 files changed

+338
-15
lines changed

7 files changed

+338
-15
lines changed

LICENSE

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,33 @@
1+
The MIT license is used for 'src/edge_tts/srt_composer.py' only. All
2+
remaining files are licensed under the LGPLv3.
3+
4+
-----------------------------------------------------------------------
5+
6+
The MIT License
7+
8+
Copyright (c) 2014-2023 Christopher Down
9+
Copyright (c) 2025- rany <rany@riseup.net>
10+
11+
Permission is hereby granted, free of charge, to any person obtaining a copy
12+
of this software and associated documentation files (the "Software"), to deal
13+
in the Software without restriction, including without limitation the rights
14+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15+
copies of the Software, and to permit persons to whom the Software is
16+
furnished to do so, subject to the following conditions:
17+
18+
The above copyright notice and this permission notice shall be included in
19+
all copies or substantial portions of the Software.
20+
21+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27+
THE SOFTWARE.
28+
29+
-----------------------------------------------------------------------
30+
131
GNU LESSER GENERAL PUBLIC LICENSE
232
Version 3, 29 June 2007
333

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,4 @@ dev =
3535
isort
3636
mypy
3737
pylint
38-
types-tabulate
38+
types-tabulate

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
install_requires=[
77
"aiohttp>=3.8.0,<4.0.0",
88
"certifi>=2023.11.17",
9-
"srt>=3.4.1,<4.0.0",
109
"tabulate>=0.4.4,<1.0.0",
1110
"typing-extensions>=4.1.0,<5.0.0",
1211
],

src/edge_tts/data_classes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def __post_init__(self) -> None:
5656
region = match.group(2)
5757
name = match.group(3)
5858
if name.find("-") != -1:
59-
region = region + "-" + name[: name.find("-")]
59+
region = f"{region}-{name[:name.find('-')]}"
6060
name = name[name.find("-") + 1 :]
6161
self.voice = (
6262
"Microsoft Server Speech Text to Speech Voice"

src/edge_tts/srt_composer.py

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
"""A tiny library for composing SRT files.
2+
3+
Based on https://github.com/cdown/srt with parsing, subtitle modifying,
4+
functionality and Python 2 support removed. This is because of
5+
https://github.com/rany2/edge-tts/issues/383.
6+
7+
Typing support was added, and more Python 3 features were used.
8+
9+
Copyright (c) 2014-2023 Christopher Down
10+
Copyright (c) 2025- rany <rany@riseup.net>
11+
12+
This file is licensed under the MIT License (MIT).
13+
See the LICENSE-MIT file for details.
14+
"""
15+
16+
import functools
17+
import logging
18+
import re
19+
from datetime import timedelta
20+
from typing import Generator, List, Union
21+
22+
LOG = logging.getLogger(__name__)
23+
24+
MULTI_WS_REGEX = re.compile(r"\n\n+")
25+
26+
ZERO_TIMEDELTA = timedelta(0)
27+
28+
# Info message if truthy return -> Function taking a Subtitle, skip if True
29+
SUBTITLE_SKIP_CONDITIONS = (
30+
("No content", lambda sub: not sub.content.strip()),
31+
("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA),
32+
("Subtitle start time >= end time", lambda sub: sub.start >= sub.end),
33+
)
34+
35+
SECONDS_IN_HOUR = 3600
36+
SECONDS_IN_MINUTE = 60
37+
HOURS_IN_DAY = 24
38+
MICROSECONDS_IN_MILLISECOND = 1000
39+
40+
41+
@functools.total_ordering
42+
class Subtitle:
43+
r"""
44+
The metadata relating to a single subtitle. Subtitles are sorted by start
45+
time by default. If no index was provided, index 0 will be used on writing
46+
an SRT block.
47+
48+
:param index: The SRT index for this subtitle
49+
:type index: int or None
50+
:param start: The time that the subtitle should start being shown
51+
:type start: :py:class:`datetime.timedelta`
52+
:param end: The time that the subtitle should stop being shown
53+
:type end: :py:class:`datetime.timedelta`
54+
:param str content: The subtitle content. Should not contain OS-specific
55+
line separators, only \\n. This is taken care of
56+
already if you use :py:func:`srt.parse` to generate
57+
Subtitle objects.
58+
"""
59+
60+
# pylint: disable=R0913
61+
def __init__(
62+
self, index: Union[int, None], start: timedelta, end: timedelta, content: str
63+
) -> None:
64+
self.index = index
65+
self.start = start
66+
self.end = end
67+
self.content = content
68+
69+
def __hash__(self) -> int:
70+
return hash(frozenset(vars(self).items()))
71+
72+
def __eq__(self, other: object) -> bool:
73+
if not isinstance(other, Subtitle):
74+
return NotImplemented
75+
76+
return vars(self) == vars(other)
77+
78+
def __lt__(self, other: object) -> bool:
79+
if not isinstance(other, Subtitle):
80+
return NotImplemented
81+
82+
return (self.start, self.end, self.index) < (
83+
other.start,
84+
other.end,
85+
other.index,
86+
)
87+
88+
def __repr__(self) -> str:
89+
# Python 2/3 cross compatibility
90+
var_items = getattr(vars(self), "iteritems", getattr(vars(self), "items"))
91+
item_list = ", ".join(f"{k}={v!r}" for k, v in var_items())
92+
return f"{type(self).__name__}({item_list})"
93+
94+
def to_srt(self, eol: Union[str, None] = None) -> str:
95+
r"""
96+
Convert the current :py:class:`Subtitle` to an SRT block.
97+
98+
:param str eol: The end of line string to use (default "\\n")
99+
:returns: The metadata of the current :py:class:`Subtitle` object as an
100+
SRT formatted subtitle block
101+
:rtype: str
102+
"""
103+
output_content = make_legal_content(self.content)
104+
105+
if eol is None:
106+
eol = "\n"
107+
elif eol != "\n":
108+
output_content = output_content.replace("\n", eol)
109+
110+
template = "{idx}{eol}{start} --> {end}{eol}{content}{eol}{eol}"
111+
return template.format(
112+
idx=self.index or 0,
113+
start=timedelta_to_srt_timestamp(self.start),
114+
end=timedelta_to_srt_timestamp(self.end),
115+
content=output_content,
116+
eol=eol,
117+
)
118+
119+
120+
def make_legal_content(content: str) -> str:
121+
r"""
122+
Remove illegal content from a content block. Illegal content includes:
123+
124+
* Blank lines
125+
* Starting or ending with a blank line
126+
127+
.. doctest::
128+
129+
>>> make_legal_content('\nfoo\n\nbar\n')
130+
'foo\nbar'
131+
132+
:param str content: The content to make legal
133+
:returns: The legalised content
134+
:rtype: srt
135+
"""
136+
# Optimisation: Usually the content we get is legally valid. Do a quick
137+
# check to see if we really need to do anything here. This saves time from
138+
# generating legal_content by about 50%.
139+
if content and content[0] != "\n" and "\n\n" not in content:
140+
return content
141+
142+
legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n"))
143+
LOG.info("Legalised content %r to %r", content, legal_content)
144+
return legal_content
145+
146+
147+
def timedelta_to_srt_timestamp(timedelta_timestamp: timedelta) -> str:
148+
r"""
149+
Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
150+
151+
.. doctest::
152+
153+
>>> import datetime
154+
>>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
155+
>>> timedelta_to_srt_timestamp(delta)
156+
'01:23:04,000'
157+
158+
:param datetime.timedelta timedelta_timestamp: A datetime to convert to an
159+
SRT timestamp
160+
:returns: The timestamp in SRT format
161+
:rtype: str
162+
"""
163+
164+
hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
165+
hrs += timedelta_timestamp.days * HOURS_IN_DAY
166+
mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
167+
msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
168+
return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{int(msecs):03}"
169+
170+
171+
def sort_and_reindex(
172+
subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
173+
start_index: int = 1,
174+
in_place: bool = False,
175+
skip: bool = True,
176+
) -> Generator[Subtitle, None, None]:
177+
"""
178+
Reorder subtitles to be sorted by start time order, and rewrite the indexes
179+
to be in that same order. This ensures that the SRT file will play in an
180+
expected fashion after, for example, times were changed in some subtitles
181+
and they may need to be resorted.
182+
183+
If skip=True, subtitles will also be skipped if they are considered not to
184+
be useful. Currently, the conditions to be considered "not useful" are as
185+
follows:
186+
187+
- Content is empty, or only whitespace
188+
- The start time is negative
189+
- The start time is equal to or later than the end time
190+
191+
.. doctest::
192+
193+
>>> from datetime import timedelta
194+
>>> one = timedelta(seconds=1)
195+
>>> two = timedelta(seconds=2)
196+
>>> three = timedelta(seconds=3)
197+
>>> subs = [
198+
... Subtitle(index=999, start=one, end=two, content='1'),
199+
... Subtitle(index=0, start=two, end=three, content='2'),
200+
... ]
201+
>>> list(sort_and_reindex(subs)) # doctest: +ELLIPSIS
202+
[Subtitle(...index=1...), Subtitle(...index=2...)]
203+
204+
:param subtitles: :py:class:`Subtitle` objects in any order
205+
:param int start_index: The index to start from
206+
:param bool in_place: Whether to modify subs in-place for performance
207+
(version <=1.0.0 behaviour)
208+
:param bool skip: Whether to skip subtitles considered not useful (see
209+
above for rules)
210+
:returns: The sorted subtitles
211+
:rtype: :term:`generator` of :py:class:`Subtitle` objects
212+
"""
213+
skipped_subs = 0
214+
for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index):
215+
if not in_place:
216+
subtitle = Subtitle(**vars(subtitle))
217+
218+
if skip:
219+
try:
220+
_should_skip_sub(subtitle)
221+
except _ShouldSkipException as thrown_exc:
222+
if subtitle.index is None:
223+
LOG.info("Skipped subtitle with no index: %s", thrown_exc)
224+
else:
225+
LOG.info(
226+
"Skipped subtitle at index %d: %s", subtitle.index, thrown_exc
227+
)
228+
skipped_subs += 1
229+
continue
230+
231+
subtitle.index = sub_num - skipped_subs
232+
233+
yield subtitle
234+
235+
236+
def _should_skip_sub(subtitle: Subtitle) -> None:
237+
"""
238+
Check if a subtitle should be skipped based on the rules in
239+
SUBTITLE_SKIP_CONDITIONS.
240+
241+
:param subtitle: A :py:class:`Subtitle` to check whether to skip
242+
:raises _ShouldSkipException: If the subtitle should be skipped
243+
"""
244+
for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS:
245+
if sub_skipper(subtitle):
246+
raise _ShouldSkipException(info_msg)
247+
248+
249+
def compose(
250+
subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
251+
reindex: bool = True,
252+
start_index: int = 1,
253+
eol: Union[str, None] = None,
254+
in_place: bool = False,
255+
) -> str:
256+
r"""
257+
Convert an iterator of :py:class:`Subtitle` objects to a string of joined
258+
SRT blocks.
259+
260+
.. doctest::
261+
262+
>>> from datetime import timedelta
263+
>>> start = timedelta(seconds=1)
264+
>>> end = timedelta(seconds=2)
265+
>>> subs = [
266+
... Subtitle(index=1, start=start, end=end, content='x'),
267+
... Subtitle(index=2, start=start, end=end, content='y'),
268+
... ]
269+
>>> compose(subs) # doctest: +ELLIPSIS
270+
'1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...'
271+
272+
:param subtitles: The subtitles to convert to SRT blocks
273+
:type subtitles: :term:`iterator` of :py:class:`Subtitle` objects
274+
:param bool reindex: Whether to reindex subtitles based on start time
275+
:param int start_index: If reindexing, the index to start reindexing from
276+
:param str eol: The end of line string to use (default "\\n")
277+
:returns: A single SRT formatted string, with each input
278+
:py:class:`Subtitle` represented as an SRT block
279+
:param bool in_place: Whether to reindex subs in-place for performance
280+
(version <=1.0.0 behaviour)
281+
:rtype: str
282+
"""
283+
if reindex:
284+
subtitles = sort_and_reindex(
285+
subtitles, start_index=start_index, in_place=in_place
286+
)
287+
288+
return "".join(subtitle.to_srt(eol=eol) for subtitle in subtitles)
289+
290+
291+
class _ShouldSkipException(Exception):
292+
"""
293+
Raised when a subtitle should be skipped.
294+
"""

0 commit comments

Comments
 (0)