Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,18 @@ Default: `23`

Default: `true`

### SRT Reader configuration (`"srt_reader"`)

#### extended_tags

`"extended_tag" : true | false`

If `true`, the following extended formatting tags are supported: `{bold}`,
`<bold>`, `{b}`, `{italic}`, `<italic>`, `{i}`, `{underline}`, `<underline>` and
`{u}`.

Default: `false`

### VTT Writer configuration (`"vtt_writer"`)

#### line_position
Expand Down
11 changes: 11 additions & 0 deletions src/main/python/ttconv/srt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@
from dataclasses import dataclass, field
from ttconv.config import ModuleConfiguration

@dataclass
class SRTReaderConfiguration(ModuleConfiguration):
"""SRT reader configuration"""

@classmethod
def name(cls):
return "srt_reader"

# enables support for extended tags: {b}, {bold}, <bold> (and italic/underline equivalents)
extended_tags: bool = field(default=False, metadata={"decoder": bool})

@dataclass
class SRTWriterConfiguration(ModuleConfiguration):
"""SRT writer configuration"""
Expand Down
41 changes: 29 additions & 12 deletions src/main/python/ttconv/srt/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from ttconv import model
from ttconv import style_properties as styles
from ttconv.utils import parse_color
from ttconv.srt.config import SRTReaderConfiguration

LOGGER = logging.getLogger(__name__)

Expand All @@ -61,11 +62,11 @@ def handle_starttag(self, tag, attrs):
self.parent.push_child(span)
self.parent = span

if tag.lower() in ("b", "bold"):
if tag.lower() in ("b"):
span.set_style(styles.StyleProperties.FontWeight, styles.FontWeightType.bold)
elif tag.lower() in ("i", "italic"):
elif tag.lower() in ("i"):
span.set_style(styles.StyleProperties.FontStyle, styles.FontStyleType.italic)
elif tag.lower() in ("u", "underline"):
elif tag.lower() in ("u"):
span.set_style(styles.StyleProperties.TextDecoration, styles.TextDecorationType(underline=True))
elif tag.lower() == "font":
for attr in attrs:
Expand Down Expand Up @@ -116,9 +117,11 @@ class _State(Enum):
_DEFAULT_OUTLINE_COLOR = styles.NamedColors.black.value
_DEFAULT_LINE_HEIGHT = styles.LengthType(125, styles.LengthType.Units.pct)

def to_model(data_file: typing.IO, _config = None, progress_callback=lambda _: None):
def to_model(data_file: typing.IO, _config: SRTReaderConfiguration = None, progress_callback=lambda _: None):
"""Converts an SRT document to the data model"""

extended_tags = _config.extended_tags if isinstance(_config, SRTReaderConfiguration) else False

doc = model.ContentDocument()

region = model.Region(_DEFAULT_REGION_ID, doc)
Expand Down Expand Up @@ -236,14 +239,28 @@ def to_model(data_file: typing.IO, _config = None, progress_callback=lambda _: N
if state in (_State.TEXT, _State.TEXT_MORE):

if line is None or _EMPTY_RE.fullmatch(line):
subtitle_text = subtitle_text.strip('\r\n')\
.replace(r"\n\r", "\n")\
.replace(r"{bold}", r"<bold>")\
.replace(r"{/bold}", r"</bold>")\
.replace(r"{italic}", r"<italic>")\
.replace(r"{/italic}", r"</italic>")\
.replace(r"{underline}", r"<underline>")\
.replace(r"{/underline}", r"</underline>")
subtitle_text = subtitle_text.strip('\r\n').replace(r"\n\r", "\n")

if extended_tags:
subtitle_text = subtitle_text\
.replace(r"{b}", r"<b>")\
.replace(r"{/b}", r"</b>")\
.replace(r"{bold}", r"<b>")\
.replace(r"{/bold}", r"</b>")\
.replace(r"<bold>", r"<b>")\
.replace(r"</bold>", r"</b>")\
.replace(r"{i}", r"<i>")\
.replace(r"{/i}", r"</i>")\
.replace(r"{italic}", r"<i>")\
.replace(r"{/italic}", r"</i>")\
.replace(r"<italic>", r"<i>")\
.replace(r"</italic>", r"</i>")\
.replace(r"{u}", r"<u>")\
.replace(r"{/u}", r"</u>")\
.replace(r"{underline}", r"<u>")\
.replace(r"{/underline}", r"</u>")\
.replace(r"<underline>", r"<u>")\
.replace(r"</underline>", r"</u>")

parser = _TextParser(current_p, line_index)
parser.feed(subtitle_text)
Expand Down
11 changes: 8 additions & 3 deletions src/main/python/ttconv/tt.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,16 @@
from ttconv.isd import ISDConfiguration
from ttconv.scc.config import SccReaderConfiguration, SccWriterConfiguration
from ttconv.stl.config import STLReaderConfiguration
from ttconv.srt.config import SRTWriterConfiguration
from ttconv.srt.config import SRTReaderConfiguration, SRTWriterConfiguration

LOGGER = logging.getLogger("ttconv")

CONFIGURATIONS = [
GeneralConfiguration,
IMSCWriterConfiguration,
ISDConfiguration,
SccReaderConfiguration
SccReaderConfiguration,
SRTReaderConfiguration,
]


Expand Down Expand Up @@ -335,12 +336,16 @@ def convert(args):
model = stl_reader.to_model(f, reader_config, progress_callback_read)

elif reader_type is FileTypes.SRT:
#
# Read the config
#
reader_config = read_config_from_json(SRTReaderConfiguration, json_config_data)

#
# Open the file and pass it to the reader
#
with open(inputfile, "r", encoding="utf-8") as f:
model = srt_reader.to_model(f, None, progress_callback_read)
model = srt_reader.to_model(f, reader_config, progress_callback_read)

elif reader_type is FileTypes.VTT:

Expand Down
124 changes: 119 additions & 5 deletions src/test/python/test_srt_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import unittest
import io

from ttconv.srt.config import SRTReaderConfiguration
from ttconv.srt.reader import to_model
import ttconv.style_properties as styles
import ttconv.model as model
Expand Down Expand Up @@ -67,7 +68,7 @@ def test_sample(self):
def test_bold(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <bold>my</bold> name is Bob
Hello <b>my</b> name is Bob
""")
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
Expand Down Expand Up @@ -107,17 +108,53 @@ def test_bold_alt(self):
00:02:16,612 --> 00:02:19,376
Hello {bold}my{/bold} name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontWeight) == styles.FontWeightType.bold:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontWeight) == styles.FontWeightType.bold:
self.fail()

def test_bold_alt2(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <bold>my</bold> name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontWeight) == styles.FontWeightType.bold:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontWeight) == styles.FontWeightType.bold:
self.fail()

def test_bold_alt3(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello {b}my{/b} name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontWeight) == styles.FontWeightType.bold:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontWeight) == styles.FontWeightType.bold:
self.fail()

def test_italic(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <italic>my</italic> name is Bob
Hello <i>my</i> name is Bob
""")
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
Expand All @@ -131,17 +168,53 @@ def test_italic_alt(self):
00:02:16,612 --> 00:02:19,376
Hello {italic}my{/italic} name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontStyle) == styles.FontStyleType.italic:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontStyle) == styles.FontStyleType.italic:
self.fail()

def test_italic_alt1(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello {i}my{/i} name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontStyle) == styles.FontStyleType.italic:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontStyle) == styles.FontStyleType.italic:
self.fail()

def test_italic_alt2(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <italic>my</italic> name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontStyle) == styles.FontStyleType.italic:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
if e.get_style(styles.StyleProperties.FontStyle) == styles.FontStyleType.italic:
self.fail()

def test_underline(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <underline>my</underline> name is Bob
Hello <u>my</u> name is Bob
""")
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
Expand All @@ -156,13 +229,54 @@ def test_underline_alt(self):
00:02:16,612 --> 00:02:19,376
Hello {underline}my{/underline} name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
text_decoration = e.get_style(styles.StyleProperties.TextDecoration)
if text_decoration is not None and text_decoration.underline:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
text_decoration = e.get_style(styles.StyleProperties.TextDecoration)
if text_decoration is not None and text_decoration.underline:
self.fail()

def test_underline_alt1(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello {u}my{/u} name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
text_decoration = e.get_style(styles.StyleProperties.TextDecoration)
if text_decoration is not None and text_decoration.underline:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
text_decoration = e.get_style(styles.StyleProperties.TextDecoration)
if text_decoration is not None and text_decoration.underline:
self.fail()

def test_underline_alt2(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <underline>my</underline> name is Bob
""")
doc = to_model(f, SRTReaderConfiguration(extended_tags=True))
for e in doc.get_body().dfs_iterator():
text_decoration = e.get_style(styles.StyleProperties.TextDecoration)
if text_decoration is not None and text_decoration.underline:
break
else:
self.fail()
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
text_decoration = e.get_style(styles.StyleProperties.TextDecoration)
if text_decoration is not None and text_decoration.underline:
self.fail()

def test_blue(self):
f = io.StringIO(r"""1
Expand All @@ -180,8 +294,8 @@ def test_blue(self):
def test_multiline_tags(self):
f = io.StringIO(r"""1
00:02:16,612 --> 00:02:19,376
Hello <bold>my
</bold> name is Bob
Hello <b>my
</b> name is Bob
""")
doc = to_model(f)
for e in doc.get_body().dfs_iterator():
Expand Down
20 changes: 20 additions & 0 deletions src/test/python/test_tt.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,26 @@ def test_lcd_filter(self):
'--filter', 'lcd',
'--config', '{"lcd": {"bg_color":"red"}}'
])

def test_srt_reader_extended_tags(self):
in_path = "src/test/resources/srt/extended-tags.srt"

out_path = "build/extended_tags.ttml"
tt.main(['convert',
'-i', in_path,
'-o', out_path,
'--config', '{"srt_reader": {"extended_tags": true}}'
])
with open(out_path, encoding="utf-8") as f:
self.assertRegex(f.read(), "fontWeight")

out_path = "build/no-extended_tags.ttml"
tt.main(['convert',
'-i', in_path,
'-o', out_path
])
with open(out_path, encoding="utf-8") as f:
self.assertNotRegex(f.read(), "fontWeight")

def test_imsc11filter(self):
out_path = "build/imsc11filter.ttml"
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/srt/extended-tags.srt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1
00:02:16,612 --> 00:02:19,376
Hello <bold>my</bold> name is Bob