@@ -49,6 +49,97 @@ def _none_terminated(iterator):
4949 yield item
5050 yield None
5151
52+ # Alignment tag regex for ASS/SSA style tags {\an1} through {\an9}
53+ _ALIGNMENT_TAG_RE = re .compile (r"\{\\an([1-9])\}" )
54+
55+ class _SSAAlignment (Enum ):
56+ """ASS/SSA alignment position codes ({\\ anN} format).
57+
58+ Numpad layout:
59+ 7=top-left 8=top-center 9=top-right
60+ 4=mid-left 5=mid-center 6=mid-right
61+ 1=bot-left 2=bot-center 3=bot-right
62+ """
63+ an1 = (styles .DisplayAlignType .after , styles .TextAlignType .start )
64+ an2 = (styles .DisplayAlignType .after , styles .TextAlignType .center )
65+ an3 = (styles .DisplayAlignType .after , styles .TextAlignType .end )
66+ an4 = (styles .DisplayAlignType .center , styles .TextAlignType .start )
67+ an5 = (styles .DisplayAlignType .center , styles .TextAlignType .center )
68+ an6 = (styles .DisplayAlignType .center , styles .TextAlignType .end )
69+ an7 = (styles .DisplayAlignType .before , styles .TextAlignType .start )
70+ an8 = (styles .DisplayAlignType .before , styles .TextAlignType .center )
71+ an9 = (styles .DisplayAlignType .before , styles .TextAlignType .end )
72+
73+ @classmethod
74+ def from_code (cls , code : int ) -> "_SSAAlignment" :
75+ """Get alignment by numeric code (1-9)."""
76+ return cls [f"an{ code } " ]
77+
78+ def _extract_alignment (text : str ) -> typing .Tuple [typing .Optional [_SSAAlignment ], str ]:
79+ """Extract alignment from text and return (alignment, cleaned_text).
80+
81+ If multiple alignment tags are present, uses the first one and removes all.
82+ Returns (None, original_text) if no alignment tag found.
83+ """
84+ match = _ALIGNMENT_TAG_RE .search (text )
85+ if match :
86+ alignment = _SSAAlignment .from_code (int (match .group (1 )))
87+ # Remove all alignment tags from text
88+ cleaned_text = _ALIGNMENT_TAG_RE .sub ("" , text )
89+ return alignment , cleaned_text
90+ return None , text
91+
92+ def _get_region_for_alignment (
93+ doc : model .ContentDocument ,
94+ alignment : _SSAAlignment ,
95+ regions_cache : typing .Dict [_SSAAlignment , model .Region ]
96+ ) -> model .Region :
97+ """Get or create a region for the given alignment.
98+
99+ Regions are cached to avoid creating duplicates.
100+ A fixed safe area margin is applied (defined by _DEFAULT_SAFE_AREA_PCT).
101+ """
102+ if alignment in regions_cache :
103+ return regions_cache [alignment ]
104+
105+ display_align , text_align = alignment .value
106+
107+ region_id = f"r_{ alignment .name } "
108+ region = model .Region (region_id , doc )
109+
110+ # Apply safe area margin
111+ region .set_style (
112+ styles .StyleProperties .Origin ,
113+ styles .CoordinateType (
114+ x = styles .LengthType (_DEFAULT_SAFE_AREA_PCT , styles .LengthType .Units .pct ),
115+ y = styles .LengthType (_DEFAULT_SAFE_AREA_PCT , styles .LengthType .Units .pct )
116+ )
117+ )
118+ region .set_style (
119+ styles .StyleProperties .Extent ,
120+ styles .ExtentType (
121+ height = styles .LengthType (100 - 2 * _DEFAULT_SAFE_AREA_PCT , styles .LengthType .Units .pct ),
122+ width = styles .LengthType (100 - 2 * _DEFAULT_SAFE_AREA_PCT , styles .LengthType .Units .pct )
123+ )
124+ )
125+ region .set_style (styles .StyleProperties .DisplayAlign , display_align )
126+ region .set_style (styles .StyleProperties .TextAlign , text_align )
127+
128+ # Apply default styling (same as default region)
129+ region .set_style (styles .StyleProperties .LineHeight , _DEFAULT_LINE_HEIGHT )
130+ region .set_style (styles .StyleProperties .FontFamily , _DEFAULT_FONT_STACK )
131+ region .set_style (styles .StyleProperties .FontSize , _DEFAULT_FONT_SIZE )
132+ region .set_style (styles .StyleProperties .Color , _DEFAULT_TEXT_COLOR )
133+ region .set_style (
134+ styles .StyleProperties .TextOutline ,
135+ styles .TextOutlineType (_DEFAULT_OUTLINE_THICKNESS , _DEFAULT_OUTLINE_COLOR )
136+ )
137+
138+ doc .put_region (region )
139+ regions_cache [alignment ] = region
140+
141+ return region
142+
52143class _TextParser (HTMLParser ):
53144
54145 def __init__ (self , paragraph : model .P , line_number : int ) -> None :
@@ -109,72 +200,34 @@ class _State(Enum):
109200_EMPTY_RE = re .compile (r"\s+" )
110201_COUNTER_RE = re .compile (r"\d+" )
111202_TIMECODE_RE = re .compile (r"(?P<begin_h>[0-9]{2,3}):(?P<begin_m>[0-9]{2}):(?P<begin_s>[0-9]{2}),(?P<begin_ms>[0-9]{3})\s+-->\s+(?P<end_h>[0-9]{2,3}):(?P<end_m>[0-9]{2}):(?P<end_s>[0-9]{2}),(?P<end_ms>[0-9]{3})" )
112- _DEFAULT_REGION_ID = "r1"
113203_DEFAULT_FONT_STACK = ("Verdana" , "Arial" , "Tiresias" , styles .GenericFontFamilyType .sansSerif )
114204_DEFAULT_FONT_SIZE = styles .LengthType (80 , styles .LengthType .Units .pct )
115205_DEFAULT_OUTLINE_THICKNESS = styles .LengthType (5 , styles .LengthType .Units .pct )
116206_DEFAULT_TEXT_COLOR = styles .NamedColors .white .value
117207_DEFAULT_OUTLINE_COLOR = styles .NamedColors .black .value
118208_DEFAULT_LINE_HEIGHT = styles .LengthType (125 , styles .LengthType .Units .pct )
209+ _DEFAULT_SAFE_AREA_PCT = 10
119210
120211def to_model (data_file : typing .IO , _config : SRTReaderConfiguration = None , progress_callback = lambda _ : None ):
121212 """Converts an SRT document to the data model"""
122213
123214 extended_tags = _config .extended_tags if isinstance (_config , SRTReaderConfiguration ) else False
215+ alignment_tags = _config .alignment_tags if isinstance (_config , SRTReaderConfiguration ) else False
124216
125- doc = model .ContentDocument ()
217+ # Cache for alignment-based regions
218+ alignment_regions_cache : typing .Dict [_SSAAlignment , model .Region ] = {}
126219
127- region = model .Region (_DEFAULT_REGION_ID , doc )
128- region .set_style (
129- styles .StyleProperties .Origin ,
130- styles .CoordinateType (
131- x = styles .LengthType (5 , styles .LengthType .Units .pct ),
132- y = styles .LengthType (5 , styles .LengthType .Units .pct )
133- )
134- )
135- region .set_style (
136- styles .StyleProperties .Extent ,
137- styles .ExtentType (
138- height = styles .LengthType (90 , styles .LengthType .Units .pct ),
139- width = styles .LengthType (90 , styles .LengthType .Units .pct )
140- )
141- )
142- region .set_style (
143- styles .StyleProperties .DisplayAlign ,
144- styles .DisplayAlignType .after
145- )
146- region .set_style (
147- styles .StyleProperties .TextAlign ,
148- styles .TextAlignType .center
149- )
150- region .set_style (
151- styles .StyleProperties .LineHeight ,
152- _DEFAULT_LINE_HEIGHT
153- )
154- region .set_style (
155- styles .StyleProperties .FontFamily ,
156- _DEFAULT_FONT_STACK
157- )
158- region .set_style (
159- styles .StyleProperties .FontSize ,
160- _DEFAULT_FONT_SIZE
161- )
162- region .set_style (
163- styles .StyleProperties .Color ,
164- _DEFAULT_TEXT_COLOR
165- )
166- region .set_style (
167- styles .StyleProperties .TextOutline ,
168- styles .TextOutlineType (
169- _DEFAULT_OUTLINE_THICKNESS ,
170- _DEFAULT_OUTLINE_COLOR
171- )
172- )
220+ doc = model .ContentDocument ()
173221
174- doc .put_region (region )
222+ # Create default region using an2 alignment (bottom, horizontal-center)
223+ region = _get_region_for_alignment (doc , _SSAAlignment .an2 , alignment_regions_cache )
175224
176225 body = model .Body (doc )
177- body .set_region (region )
226+ # Only set body region if not using alignment_tags
227+ # When alignment_tags is enabled, paragraphs have their own regions and body
228+ # must not have a region set, otherwise ISD generation will prune content
229+ if not alignment_tags :
230+ body .set_region (region )
178231
179232 doc .set_body (body )
180233
@@ -241,6 +294,15 @@ def to_model(data_file: typing.IO, _config: SRTReaderConfiguration = None, progr
241294 if line is None or _EMPTY_RE .fullmatch (line ):
242295 subtitle_text = subtitle_text .strip ('\r \n ' ).replace (r"\n\r" , "\n " )
243296
297+ # Extract and handle alignment tags if enabled
298+ if alignment_tags :
299+ alignment , subtitle_text = _extract_alignment (subtitle_text )
300+ if alignment is not None :
301+ aligned_region = _get_region_for_alignment (
302+ doc , alignment , alignment_regions_cache
303+ )
304+ current_p .set_region (aligned_region )
305+
244306 if extended_tags :
245307 subtitle_text = subtitle_text \
246308 .replace (r"{b}" , r"<b>" )\
0 commit comments