|
| 1 | +"""A tiny library for composing SRT files. |
| 2 | +
|
| 3 | +Based on https://github.com/cdown/srt with parsing, subtitle modifying, |
| 4 | +functionality and Python 2 support removed. This is because of |
| 5 | +https://github.com/rany2/edge-tts/issues/383. |
| 6 | +
|
| 7 | +Typing support was added, and more Python 3 features were used. |
| 8 | +
|
| 9 | +Copyright (c) 2014-2023 Christopher Down |
| 10 | +Copyright (c) 2025- rany <rany@riseup.net> |
| 11 | +
|
| 12 | +This file is licensed under the MIT License (MIT). |
| 13 | +See the LICENSE-MIT file for details. |
| 14 | +""" |
| 15 | + |
| 16 | +import functools |
| 17 | +import logging |
| 18 | +import re |
| 19 | +from datetime import timedelta |
| 20 | +from typing import Generator, List, Union |
| 21 | + |
| 22 | +LOG = logging.getLogger(__name__) |
| 23 | + |
| 24 | +MULTI_WS_REGEX = re.compile(r"\n\n+") |
| 25 | + |
| 26 | +ZERO_TIMEDELTA = timedelta(0) |
| 27 | + |
| 28 | +# Info message if truthy return -> Function taking a Subtitle, skip if True |
| 29 | +SUBTITLE_SKIP_CONDITIONS = ( |
| 30 | + ("No content", lambda sub: not sub.content.strip()), |
| 31 | + ("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA), |
| 32 | + ("Subtitle start time >= end time", lambda sub: sub.start >= sub.end), |
| 33 | +) |
| 34 | + |
| 35 | +SECONDS_IN_HOUR = 3600 |
| 36 | +SECONDS_IN_MINUTE = 60 |
| 37 | +HOURS_IN_DAY = 24 |
| 38 | +MICROSECONDS_IN_MILLISECOND = 1000 |
| 39 | + |
| 40 | + |
| 41 | +@functools.total_ordering |
| 42 | +class Subtitle: |
| 43 | + r""" |
| 44 | + The metadata relating to a single subtitle. Subtitles are sorted by start |
| 45 | + time by default. If no index was provided, index 0 will be used on writing |
| 46 | + an SRT block. |
| 47 | +
|
| 48 | + :param index: The SRT index for this subtitle |
| 49 | + :type index: int or None |
| 50 | + :param start: The time that the subtitle should start being shown |
| 51 | + :type start: :py:class:`datetime.timedelta` |
| 52 | + :param end: The time that the subtitle should stop being shown |
| 53 | + :type end: :py:class:`datetime.timedelta` |
| 54 | + :param str content: The subtitle content. Should not contain OS-specific |
| 55 | + line separators, only \\n. This is taken care of |
| 56 | + already if you use :py:func:`srt.parse` to generate |
| 57 | + Subtitle objects. |
| 58 | + """ |
| 59 | + |
| 60 | + # pylint: disable=R0913 |
| 61 | + def __init__( |
| 62 | + self, index: Union[int, None], start: timedelta, end: timedelta, content: str |
| 63 | + ) -> None: |
| 64 | + self.index = index |
| 65 | + self.start = start |
| 66 | + self.end = end |
| 67 | + self.content = content |
| 68 | + |
| 69 | + def __hash__(self) -> int: |
| 70 | + return hash(frozenset(vars(self).items())) |
| 71 | + |
| 72 | + def __eq__(self, other: object) -> bool: |
| 73 | + if not isinstance(other, Subtitle): |
| 74 | + return NotImplemented |
| 75 | + |
| 76 | + return vars(self) == vars(other) |
| 77 | + |
| 78 | + def __lt__(self, other: object) -> bool: |
| 79 | + if not isinstance(other, Subtitle): |
| 80 | + return NotImplemented |
| 81 | + |
| 82 | + return (self.start, self.end, self.index) < ( |
| 83 | + other.start, |
| 84 | + other.end, |
| 85 | + other.index, |
| 86 | + ) |
| 87 | + |
| 88 | + def __repr__(self) -> str: |
| 89 | + # Python 2/3 cross compatibility |
| 90 | + var_items = getattr(vars(self), "iteritems", getattr(vars(self), "items")) |
| 91 | + item_list = ", ".join(f"{k}={v!r}" for k, v in var_items()) |
| 92 | + return f"{type(self).__name__}({item_list})" |
| 93 | + |
| 94 | + def to_srt(self, eol: Union[str, None] = None) -> str: |
| 95 | + r""" |
| 96 | + Convert the current :py:class:`Subtitle` to an SRT block. |
| 97 | +
|
| 98 | + :param str eol: The end of line string to use (default "\\n") |
| 99 | + :returns: The metadata of the current :py:class:`Subtitle` object as an |
| 100 | + SRT formatted subtitle block |
| 101 | + :rtype: str |
| 102 | + """ |
| 103 | + output_content = make_legal_content(self.content) |
| 104 | + |
| 105 | + if eol is None: |
| 106 | + eol = "\n" |
| 107 | + elif eol != "\n": |
| 108 | + output_content = output_content.replace("\n", eol) |
| 109 | + |
| 110 | + template = "{idx}{eol}{start} --> {end}{eol}{content}{eol}{eol}" |
| 111 | + return template.format( |
| 112 | + idx=self.index or 0, |
| 113 | + start=timedelta_to_srt_timestamp(self.start), |
| 114 | + end=timedelta_to_srt_timestamp(self.end), |
| 115 | + content=output_content, |
| 116 | + eol=eol, |
| 117 | + ) |
| 118 | + |
| 119 | + |
| 120 | +def make_legal_content(content: str) -> str: |
| 121 | + r""" |
| 122 | + Remove illegal content from a content block. Illegal content includes: |
| 123 | +
|
| 124 | + * Blank lines |
| 125 | + * Starting or ending with a blank line |
| 126 | +
|
| 127 | + .. doctest:: |
| 128 | +
|
| 129 | + >>> make_legal_content('\nfoo\n\nbar\n') |
| 130 | + 'foo\nbar' |
| 131 | +
|
| 132 | + :param str content: The content to make legal |
| 133 | + :returns: The legalised content |
| 134 | + :rtype: srt |
| 135 | + """ |
| 136 | + # Optimisation: Usually the content we get is legally valid. Do a quick |
| 137 | + # check to see if we really need to do anything here. This saves time from |
| 138 | + # generating legal_content by about 50%. |
| 139 | + if content and content[0] != "\n" and "\n\n" not in content: |
| 140 | + return content |
| 141 | + |
| 142 | + legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n")) |
| 143 | + LOG.info("Legalised content %r to %r", content, legal_content) |
| 144 | + return legal_content |
| 145 | + |
| 146 | + |
| 147 | +def timedelta_to_srt_timestamp(timedelta_timestamp: timedelta) -> str: |
| 148 | + r""" |
| 149 | + Convert a :py:class:`~datetime.timedelta` to an SRT timestamp. |
| 150 | +
|
| 151 | + .. doctest:: |
| 152 | +
|
| 153 | + >>> import datetime |
| 154 | + >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4) |
| 155 | + >>> timedelta_to_srt_timestamp(delta) |
| 156 | + '01:23:04,000' |
| 157 | +
|
| 158 | + :param datetime.timedelta timedelta_timestamp: A datetime to convert to an |
| 159 | + SRT timestamp |
| 160 | + :returns: The timestamp in SRT format |
| 161 | + :rtype: str |
| 162 | + """ |
| 163 | + |
| 164 | + hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR) |
| 165 | + hrs += timedelta_timestamp.days * HOURS_IN_DAY |
| 166 | + mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE) |
| 167 | + msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND |
| 168 | + return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{int(msecs):03}" |
| 169 | + |
| 170 | + |
| 171 | +def sort_and_reindex( |
| 172 | + subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]], |
| 173 | + start_index: int = 1, |
| 174 | + in_place: bool = False, |
| 175 | + skip: bool = True, |
| 176 | +) -> Generator[Subtitle, None, None]: |
| 177 | + """ |
| 178 | + Reorder subtitles to be sorted by start time order, and rewrite the indexes |
| 179 | + to be in that same order. This ensures that the SRT file will play in an |
| 180 | + expected fashion after, for example, times were changed in some subtitles |
| 181 | + and they may need to be resorted. |
| 182 | +
|
| 183 | + If skip=True, subtitles will also be skipped if they are considered not to |
| 184 | + be useful. Currently, the conditions to be considered "not useful" are as |
| 185 | + follows: |
| 186 | +
|
| 187 | + - Content is empty, or only whitespace |
| 188 | + - The start time is negative |
| 189 | + - The start time is equal to or later than the end time |
| 190 | +
|
| 191 | + .. doctest:: |
| 192 | +
|
| 193 | + >>> from datetime import timedelta |
| 194 | + >>> one = timedelta(seconds=1) |
| 195 | + >>> two = timedelta(seconds=2) |
| 196 | + >>> three = timedelta(seconds=3) |
| 197 | + >>> subs = [ |
| 198 | + ... Subtitle(index=999, start=one, end=two, content='1'), |
| 199 | + ... Subtitle(index=0, start=two, end=three, content='2'), |
| 200 | + ... ] |
| 201 | + >>> list(sort_and_reindex(subs)) # doctest: +ELLIPSIS |
| 202 | + [Subtitle(...index=1...), Subtitle(...index=2...)] |
| 203 | +
|
| 204 | + :param subtitles: :py:class:`Subtitle` objects in any order |
| 205 | + :param int start_index: The index to start from |
| 206 | + :param bool in_place: Whether to modify subs in-place for performance |
| 207 | + (version <=1.0.0 behaviour) |
| 208 | + :param bool skip: Whether to skip subtitles considered not useful (see |
| 209 | + above for rules) |
| 210 | + :returns: The sorted subtitles |
| 211 | + :rtype: :term:`generator` of :py:class:`Subtitle` objects |
| 212 | + """ |
| 213 | + skipped_subs = 0 |
| 214 | + for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index): |
| 215 | + if not in_place: |
| 216 | + subtitle = Subtitle(**vars(subtitle)) |
| 217 | + |
| 218 | + if skip: |
| 219 | + try: |
| 220 | + _should_skip_sub(subtitle) |
| 221 | + except _ShouldSkipException as thrown_exc: |
| 222 | + if subtitle.index is None: |
| 223 | + LOG.info("Skipped subtitle with no index: %s", thrown_exc) |
| 224 | + else: |
| 225 | + LOG.info( |
| 226 | + "Skipped subtitle at index %d: %s", subtitle.index, thrown_exc |
| 227 | + ) |
| 228 | + skipped_subs += 1 |
| 229 | + continue |
| 230 | + |
| 231 | + subtitle.index = sub_num - skipped_subs |
| 232 | + |
| 233 | + yield subtitle |
| 234 | + |
| 235 | + |
| 236 | +def _should_skip_sub(subtitle: Subtitle) -> None: |
| 237 | + """ |
| 238 | + Check if a subtitle should be skipped based on the rules in |
| 239 | + SUBTITLE_SKIP_CONDITIONS. |
| 240 | +
|
| 241 | + :param subtitle: A :py:class:`Subtitle` to check whether to skip |
| 242 | + :raises _ShouldSkipException: If the subtitle should be skipped |
| 243 | + """ |
| 244 | + for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS: |
| 245 | + if sub_skipper(subtitle): |
| 246 | + raise _ShouldSkipException(info_msg) |
| 247 | + |
| 248 | + |
| 249 | +def compose( |
| 250 | + subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]], |
| 251 | + reindex: bool = True, |
| 252 | + start_index: int = 1, |
| 253 | + eol: Union[str, None] = None, |
| 254 | + in_place: bool = False, |
| 255 | +) -> str: |
| 256 | + r""" |
| 257 | + Convert an iterator of :py:class:`Subtitle` objects to a string of joined |
| 258 | + SRT blocks. |
| 259 | +
|
| 260 | + .. doctest:: |
| 261 | +
|
| 262 | + >>> from datetime import timedelta |
| 263 | + >>> start = timedelta(seconds=1) |
| 264 | + >>> end = timedelta(seconds=2) |
| 265 | + >>> subs = [ |
| 266 | + ... Subtitle(index=1, start=start, end=end, content='x'), |
| 267 | + ... Subtitle(index=2, start=start, end=end, content='y'), |
| 268 | + ... ] |
| 269 | + >>> compose(subs) # doctest: +ELLIPSIS |
| 270 | + '1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...' |
| 271 | +
|
| 272 | + :param subtitles: The subtitles to convert to SRT blocks |
| 273 | + :type subtitles: :term:`iterator` of :py:class:`Subtitle` objects |
| 274 | + :param bool reindex: Whether to reindex subtitles based on start time |
| 275 | + :param int start_index: If reindexing, the index to start reindexing from |
| 276 | + :param str eol: The end of line string to use (default "\\n") |
| 277 | + :returns: A single SRT formatted string, with each input |
| 278 | + :py:class:`Subtitle` represented as an SRT block |
| 279 | + :param bool in_place: Whether to reindex subs in-place for performance |
| 280 | + (version <=1.0.0 behaviour) |
| 281 | + :rtype: str |
| 282 | + """ |
| 283 | + if reindex: |
| 284 | + subtitles = sort_and_reindex( |
| 285 | + subtitles, start_index=start_index, in_place=in_place |
| 286 | + ) |
| 287 | + |
| 288 | + return "".join(subtitle.to_srt(eol=eol) for subtitle in subtitles) |
| 289 | + |
| 290 | + |
| 291 | +class _ShouldSkipException(Exception): |
| 292 | + """ |
| 293 | + Raised when a subtitle should be skipped. |
| 294 | + """ |
0 commit comments