Skip to content

Commit 174bcb9

Browse files
nywheretpt-ntardieupalemieux
authored
SRT reader: add support for alignment tags
--------- Co-authored-by: Nicolas Tardieu <ntardieu@transperfect.com> Co-authored-by: Pierre-Anthony Lemieux <pal@sandflow.com>
1 parent e087230 commit 174bcb9

File tree

5 files changed

+363
-50
lines changed

5 files changed

+363
-50
lines changed

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,19 @@ If `true`, the following extended formatting tags are supported: `{bold}`,
213213

214214
Default: `false`
215215

216+
#### alignment_tags
217+
218+
`"alignment_tags" : true | false`
219+
220+
If `true`, ASS/SSA-style alignment tags (`{\anN}`) are supported, where N is a
221+
number from 1-9 corresponding to positions on a numpad:
222+
223+
* 1-3: bottom (left, center, right)
224+
* 4-6: middle (left, center, right)
225+
* 7-9: top (left, center, right)
226+
227+
Default: `false`
228+
216229
### VTT Writer configuration (`"vtt_writer"`)
217230

218231
#### line_position

src/main/python/ttconv/srt/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def name(cls):
4141
# enables support for extended tags: {b}, {bold}, <bold> (and italic/underline equivalents)
4242
extended_tags: bool = field(default=False, metadata={"decoder": bool})
4343

44+
# enables support for alignment tags: {\an1} through {\an9}
45+
alignment_tags: bool = field(default=False, metadata={"decoder": bool})
46+
4447
@dataclass
4548
class SRTWriterConfiguration(ModuleConfiguration):
4649
"""SRT writer configuration"""

src/main/python/ttconv/srt/reader.py

Lines changed: 112 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,97 @@ def _none_terminated(iterator):
4949
yield item
5050
yield None
5151

52+
# Alignment tag regex for ASS/SSA style tags {\an1} through {\an9}
53+
_ALIGNMENT_TAG_RE = re.compile(r"\{\\an([1-9])\}")
54+
55+
class _SSAAlignment(Enum):
56+
"""ASS/SSA alignment position codes ({\\anN} format).
57+
58+
Numpad layout:
59+
7=top-left 8=top-center 9=top-right
60+
4=mid-left 5=mid-center 6=mid-right
61+
1=bot-left 2=bot-center 3=bot-right
62+
"""
63+
an1 = (styles.DisplayAlignType.after, styles.TextAlignType.start)
64+
an2 = (styles.DisplayAlignType.after, styles.TextAlignType.center)
65+
an3 = (styles.DisplayAlignType.after, styles.TextAlignType.end)
66+
an4 = (styles.DisplayAlignType.center, styles.TextAlignType.start)
67+
an5 = (styles.DisplayAlignType.center, styles.TextAlignType.center)
68+
an6 = (styles.DisplayAlignType.center, styles.TextAlignType.end)
69+
an7 = (styles.DisplayAlignType.before, styles.TextAlignType.start)
70+
an8 = (styles.DisplayAlignType.before, styles.TextAlignType.center)
71+
an9 = (styles.DisplayAlignType.before, styles.TextAlignType.end)
72+
73+
@classmethod
74+
def from_code(cls, code: int) -> "_SSAAlignment":
75+
"""Get alignment by numeric code (1-9)."""
76+
return cls[f"an{code}"]
77+
78+
def _extract_alignment(text: str) -> typing.Tuple[typing.Optional[_SSAAlignment], str]:
79+
"""Extract alignment from text and return (alignment, cleaned_text).
80+
81+
If multiple alignment tags are present, uses the first one and removes all.
82+
Returns (None, original_text) if no alignment tag found.
83+
"""
84+
match = _ALIGNMENT_TAG_RE.search(text)
85+
if match:
86+
alignment = _SSAAlignment.from_code(int(match.group(1)))
87+
# Remove all alignment tags from text
88+
cleaned_text = _ALIGNMENT_TAG_RE.sub("", text)
89+
return alignment, cleaned_text
90+
return None, text
91+
92+
def _get_region_for_alignment(
93+
doc: model.ContentDocument,
94+
alignment: _SSAAlignment,
95+
regions_cache: typing.Dict[_SSAAlignment, model.Region]
96+
) -> model.Region:
97+
"""Get or create a region for the given alignment.
98+
99+
Regions are cached to avoid creating duplicates.
100+
A fixed safe area margin is applied (defined by _DEFAULT_SAFE_AREA_PCT).
101+
"""
102+
if alignment in regions_cache:
103+
return regions_cache[alignment]
104+
105+
display_align, text_align = alignment.value
106+
107+
region_id = f"r_{alignment.name}"
108+
region = model.Region(region_id, doc)
109+
110+
# Apply safe area margin
111+
region.set_style(
112+
styles.StyleProperties.Origin,
113+
styles.CoordinateType(
114+
x=styles.LengthType(_DEFAULT_SAFE_AREA_PCT, styles.LengthType.Units.pct),
115+
y=styles.LengthType(_DEFAULT_SAFE_AREA_PCT, styles.LengthType.Units.pct)
116+
)
117+
)
118+
region.set_style(
119+
styles.StyleProperties.Extent,
120+
styles.ExtentType(
121+
height=styles.LengthType(100 - 2 * _DEFAULT_SAFE_AREA_PCT, styles.LengthType.Units.pct),
122+
width=styles.LengthType(100 - 2 * _DEFAULT_SAFE_AREA_PCT, styles.LengthType.Units.pct)
123+
)
124+
)
125+
region.set_style(styles.StyleProperties.DisplayAlign, display_align)
126+
region.set_style(styles.StyleProperties.TextAlign, text_align)
127+
128+
# Apply default styling (same as default region)
129+
region.set_style(styles.StyleProperties.LineHeight, _DEFAULT_LINE_HEIGHT)
130+
region.set_style(styles.StyleProperties.FontFamily, _DEFAULT_FONT_STACK)
131+
region.set_style(styles.StyleProperties.FontSize, _DEFAULT_FONT_SIZE)
132+
region.set_style(styles.StyleProperties.Color, _DEFAULT_TEXT_COLOR)
133+
region.set_style(
134+
styles.StyleProperties.TextOutline,
135+
styles.TextOutlineType(_DEFAULT_OUTLINE_THICKNESS, _DEFAULT_OUTLINE_COLOR)
136+
)
137+
138+
doc.put_region(region)
139+
regions_cache[alignment] = region
140+
141+
return region
142+
52143
class _TextParser(HTMLParser):
53144

54145
def __init__(self, paragraph: model.P, line_number: int) -> None:
@@ -109,72 +200,34 @@ class _State(Enum):
109200
_EMPTY_RE = re.compile(r"\s+")
110201
_COUNTER_RE = re.compile(r"\d+")
111202
_TIMECODE_RE = re.compile(r"(?P<begin_h>[0-9]{2,3}):(?P<begin_m>[0-9]{2}):(?P<begin_s>[0-9]{2}),(?P<begin_ms>[0-9]{3})\s+-->\s+(?P<end_h>[0-9]{2,3}):(?P<end_m>[0-9]{2}):(?P<end_s>[0-9]{2}),(?P<end_ms>[0-9]{3})")
112-
_DEFAULT_REGION_ID = "r1"
113203
_DEFAULT_FONT_STACK = ("Verdana", "Arial", "Tiresias", styles.GenericFontFamilyType.sansSerif)
114204
_DEFAULT_FONT_SIZE = styles.LengthType(80, styles.LengthType.Units.pct)
115205
_DEFAULT_OUTLINE_THICKNESS = styles.LengthType(5, styles.LengthType.Units.pct)
116206
_DEFAULT_TEXT_COLOR = styles.NamedColors.white.value
117207
_DEFAULT_OUTLINE_COLOR = styles.NamedColors.black.value
118208
_DEFAULT_LINE_HEIGHT = styles.LengthType(125, styles.LengthType.Units.pct)
209+
_DEFAULT_SAFE_AREA_PCT = 10
119210

120211
def to_model(data_file: typing.IO, _config: SRTReaderConfiguration = None, progress_callback=lambda _: None):
121212
"""Converts an SRT document to the data model"""
122213

123214
extended_tags = _config.extended_tags if isinstance(_config, SRTReaderConfiguration) else False
215+
alignment_tags = _config.alignment_tags if isinstance(_config, SRTReaderConfiguration) else False
124216

125-
doc = model.ContentDocument()
217+
# Cache for alignment-based regions
218+
alignment_regions_cache: typing.Dict[_SSAAlignment, model.Region] = {}
126219

127-
region = model.Region(_DEFAULT_REGION_ID, doc)
128-
region.set_style(
129-
styles.StyleProperties.Origin,
130-
styles.CoordinateType(
131-
x=styles.LengthType(5, styles.LengthType.Units.pct),
132-
y=styles.LengthType(5, styles.LengthType.Units.pct)
133-
)
134-
)
135-
region.set_style(
136-
styles.StyleProperties.Extent,
137-
styles.ExtentType(
138-
height=styles.LengthType(90, styles.LengthType.Units.pct),
139-
width=styles.LengthType(90, styles.LengthType.Units.pct)
140-
)
141-
)
142-
region.set_style(
143-
styles.StyleProperties.DisplayAlign,
144-
styles.DisplayAlignType.after
145-
)
146-
region.set_style(
147-
styles.StyleProperties.TextAlign,
148-
styles.TextAlignType.center
149-
)
150-
region.set_style(
151-
styles.StyleProperties.LineHeight,
152-
_DEFAULT_LINE_HEIGHT
153-
)
154-
region.set_style(
155-
styles.StyleProperties.FontFamily,
156-
_DEFAULT_FONT_STACK
157-
)
158-
region.set_style(
159-
styles.StyleProperties.FontSize,
160-
_DEFAULT_FONT_SIZE
161-
)
162-
region.set_style(
163-
styles.StyleProperties.Color,
164-
_DEFAULT_TEXT_COLOR
165-
)
166-
region.set_style(
167-
styles.StyleProperties.TextOutline,
168-
styles.TextOutlineType(
169-
_DEFAULT_OUTLINE_THICKNESS,
170-
_DEFAULT_OUTLINE_COLOR
171-
)
172-
)
220+
doc = model.ContentDocument()
173221

174-
doc.put_region(region)
222+
# Create default region using an2 alignment (bottom, horizontal-center)
223+
region = _get_region_for_alignment(doc, _SSAAlignment.an2, alignment_regions_cache)
175224

176225
body = model.Body(doc)
177-
body.set_region(region)
226+
# Only set body region if not using alignment_tags
227+
# When alignment_tags is enabled, paragraphs have their own regions and body
228+
# must not have a region set, otherwise ISD generation will prune content
229+
if not alignment_tags:
230+
body.set_region(region)
178231

179232
doc.set_body(body)
180233

@@ -241,6 +294,15 @@ def to_model(data_file: typing.IO, _config: SRTReaderConfiguration = None, progr
241294
if line is None or _EMPTY_RE.fullmatch(line):
242295
subtitle_text = subtitle_text.strip('\r\n').replace(r"\n\r", "\n")
243296

297+
# Extract and handle alignment tags if enabled
298+
if alignment_tags:
299+
alignment, subtitle_text = _extract_alignment(subtitle_text)
300+
if alignment is not None:
301+
aligned_region = _get_region_for_alignment(
302+
doc, alignment, alignment_regions_cache
303+
)
304+
current_p.set_region(aligned_region)
305+
244306
if extended_tags:
245307
subtitle_text = subtitle_text\
246308
.replace(r"{b}", r"<b>")\

0 commit comments

Comments
 (0)