Skip to content

Commit 6ae805c

Browse files
authored
Merge pull request #339 from pbs/OCTO-11027-BS-should-be-skipped-if-PAC-commands-are-duplicated
Octo 11027 bs should be skipped if pac commands are duplicated
2 parents e2f6acd + dbffa90 commit 6ae805c

File tree

11 files changed

+109
-35
lines changed

11 files changed

+109
-35
lines changed

docs/changelog.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
Changelog
22
---------
3-
2.2.10
4-
^^^^^
3+
2.2.11
4+
^^^^^^
55
- A space should not be placed before a mid row code if it follows a PAC command or a Tab Offset
66
- The backspace command should be treated like other commands and duplicates should be skipped if PAC commands are duplicated
7+
- Prevent webvtt writer from creating a new cue in case of line break
8+
- In case of style setting PAC which also breaks the line, we add the break first, then the style tag
9+
10+
2.2.10
11+
^^^^^
12+
- Yanked.
713

814
2.2.9
915
^^^^^

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@
5353
# built documents.
5454
#
5555
# The short X.Y version.
56-
version = '2.2.10'
56+
version = '2.2.11'
5757
# The full version, including alpha/beta/rc tags.
58-
release = '2.2.10'
58+
release = '2.2.11'
5959

6060
# The language for content autogenerated by Sphinx. Refer to documentation
6161
# for a list of supported languages.

pycaption/base.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,16 @@ class CaptionNode:
114114
STYLE = 2
115115
BREAK = 3
116116

117-
def __init__(self, type_, layout_info=None, content=None, start=None):
117+
def __init__(
118+
self, type_, layout_info=None, content=None, start=None, position=None
119+
):
118120
"""
119121
:type type_: int
120122
:type layout_info: Layout
121123
"""
122124
self.type_ = type_
123125
self.content = content
126+
self.position = position
124127

125128
# Boolean. Marks the beginning/ end of a Style node.
126129
self.start = start
@@ -139,19 +142,24 @@ def __repr__(self):
139142
raise RuntimeError(f'Unknown node type: {t}')
140143

141144
@staticmethod
142-
def create_text(text, layout_info=None):
145+
def create_text(text, layout_info=None, position=None):
143146
return CaptionNode(
144-
CaptionNode.TEXT, layout_info=layout_info, content=text)
147+
type_=CaptionNode.TEXT, layout_info=layout_info,
148+
position=position, content=text
149+
)
145150

146151
@staticmethod
147152
def create_style(start, content, layout_info=None):
148153
return CaptionNode(
149-
CaptionNode.STYLE, layout_info=layout_info, content=content,
154+
type_=CaptionNode.STYLE, layout_info=layout_info, content=content,
150155
start=start)
151156

152157
@staticmethod
153-
def create_break(layout_info=None):
154-
return CaptionNode(CaptionNode.BREAK, layout_info=layout_info)
158+
def create_break(layout_info=None, content=None):
159+
return CaptionNode(
160+
type_=CaptionNode.BREAK, layout_info=layout_info,
161+
content=content
162+
)
155163

156164

157165
class Caption:

pycaption/scc/__init__.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE,
9595
SPECIAL_OR_EXTENDED_CHAR_TO_CODE, PAC_BYTES_TO_POSITIONING_MAP,
9696
PAC_HIGH_BYTE_BY_ROW, PAC_LOW_BYTE_BY_ROW_RESTRICTED,
97-
PAC_TAB_OFFSET_COMMANDS,
97+
PAC_TAB_OFFSET_COMMANDS, CUE_STARTING_COMMAND
9898
)
9999
from .specialized_collections import ( # noqa: F401
100100
TimingCorrectingCaptionList, NotifyingDict, CaptionCreator,
@@ -164,6 +164,7 @@ def __init__(self, *args, **kw):
164164
)
165165

166166
self.last_command = ''
167+
self.double_starter = False
167168

168169
self.buffer_dict = NotifyingDict()
169170

@@ -223,6 +224,7 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
223224
# split lines
224225
lines = content.splitlines()
225226

227+
226228
# loop through each line except the first
227229
for line in lines[1:]:
228230
self._translate_line(line)
@@ -307,24 +309,21 @@ def _translate_line(self, line):
307309
parts = r.findall(line.lower())
308310

309311
self.time_translator.start_at(parts[0][0])
310-
311312
word_list = parts[0][2].split(' ')
312-
pacs_are_doubled = len(word_list) > 1 and word_list[0] == word_list[1]
313+
313314
for idx, word in enumerate(word_list):
314-
# ignore empty results or invalid commands
315315
word = word.strip()
316-
previous_is_pac_or_tab = idx > 0 and (
317-
_is_pac_command(word_list[idx-1]) or word_list[idx-1] in PAC_TAB_OFFSET_COMMANDS
316+
previous_is_pac_or_tab = len(word_list) > 1 and (
317+
_is_pac_command(word_list[idx - 1]) or word_list[idx - 1] in PAC_TAB_OFFSET_COMMANDS
318318
)
319319
if len(word) == 4:
320320
self._translate_word(
321321
word=word,
322322
previous_is_pac_or_tab=previous_is_pac_or_tab,
323-
pacs_are_doubled=pacs_are_doubled
324323
)
325324

326-
def _translate_word(self, word, previous_is_pac_or_tab, pacs_are_doubled):
327-
if self._handle_double_command(word, pacs_are_doubled):
325+
def _translate_word(self, word, previous_is_pac_or_tab):
326+
if self._handle_double_command(word):
328327
# count frames for timing
329328
self.time_translator.increment_frames()
330329
return
@@ -348,19 +347,25 @@ def _translate_word(self, word, previous_is_pac_or_tab, pacs_are_doubled):
348347
# count frames for timing only after processing a command
349348
self.time_translator.increment_frames()
350349

351-
def _handle_double_command(self, word, pacs_are_doubled):
350+
def _handle_double_command(self, word):
352351
# If the caption is to be broadcast, each of the commands are doubled
353352
# up for redundancy in case the signal is garbled in transmission.
354353
# The decoder is programmed to ignore a second command when it is the
355354
# same as the first.
356355
# If we have doubled commands we're skipping also
357356
# doubled special characters and doubled extended characters
358357
# with only one member of each pair being displayed.
359-
doubled_types = word in COMMANDS or _is_pac_command(word)
360-
if pacs_are_doubled:
361-
doubled_types = doubled_types or word in SPECIAL_CHARS or word in EXTENDED_CHARS
358+
359+
doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word)
360+
if self.double_starter:
361+
doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" or word in SPECIAL_CHARS
362+
363+
if word in CUE_STARTING_COMMAND and word != self.last_command:
364+
self.double_starter = False
362365

363366
if doubled_types and word == self.last_command:
367+
if word in CUE_STARTING_COMMAND:
368+
self.double_starter = True
364369
self.last_command = ''
365370
return True
366371
# Fix for the <position> <tab offset> <position> <tab offset>

pycaption/scc/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,3 +1058,5 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
10581058
"10a7", "10a8", "1029", "102a", "10ab", "102c", "10ad",
10591059
"10ae", "102f", "97ad"
10601060
]
1061+
1062+
CUE_STARTING_COMMAND = ['9425', '9426', '94a7', '9429', '9420']

pycaption/scc/specialized_collections.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import collections
2-
import unicodedata
32

43
from ..base import CaptionList, Caption, CaptionNode
54
from ..geometry import (
@@ -9,7 +8,7 @@
98
from .constants import (
109
PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
1110
MICROSECONDS_PER_CODEWORD, BACKGROUND_COLOR_CODES,
12-
MID_ROW_CODES, EXTENDED_CHARS, SPECIAL_CHARS
11+
MID_ROW_CODES, EXTENDED_CHARS
1312
)
1413

1514
PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
@@ -255,7 +254,10 @@ def create_and_store(self, node_buffer, start, end=0):
255254
layout_info = _get_layout_from_tuple(instruction.position)
256255
caption.nodes.append(
257256
CaptionNode.create_text(
258-
instruction.text, layout_info=layout_info),
257+
text=instruction.text,
258+
layout_info=layout_info,
259+
position=instruction.position
260+
)
259261
)
260262
caption.layout_info = layout_info
261263

@@ -366,6 +368,10 @@ def interpret_command(self, command, previous_is_pac_or_tab=False):
366368
self._collection[-1].text = self._collection[-1].text[:-1]
367369

368370
if 'italic' in text:
371+
if self._position_tracer.is_linebreak_required():
372+
self._collection.append(_InstructionNode.create_break(
373+
position=self._position_tracer.get_current_position()))
374+
self._position_tracer.acknowledge_linebreak_consumed()
369375
if 'end' not in text:
370376
self._collection.append(
371377
_InstructionNode.create_italics_style(

pycaption/webvtt.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ def _group_cues_by_layout(self, nodes, caption_set):
394394
return []
395395

396396
current_layout = None
397+
current_node = None
397398

398399
# A list with layout groups. Since WebVTT only support positioning
399400
# for different cues, each layout group has to be represented in a
@@ -402,17 +403,24 @@ def _group_cues_by_layout(self, nodes, caption_set):
402403
# A properly encoded WebVTT string (plain unicode must be properly
403404
# escaped before being appended to this string)
404405
s = ''
406+
row, column, prev_row, prev_column = 0, 0, 0, 0
405407
for i, node in enumerate(nodes):
406408
if node.type_ == CaptionNode.TEXT:
407409
if s and current_layout and node.layout_info != current_layout:
408410
# If the positioning changes from one text node to
409411
# another, a new WebVTT cue has to be created.
410-
layout_groups.append((s, current_layout))
411-
s = ''
412+
row, column = node.position if node.position else (0, 0)
413+
prev_row, prev_column = current_node.position if current_node.position else (0, 0)
414+
if row == prev_row + 1:
415+
s += '\n'
416+
else:
417+
layout_groups.append((s, current_layout))
418+
s = ''
412419
# ATTENTION: This is where the plain unicode node content is
413420
# finally encoded as WebVTT.
414421
s += self._encode_illegal_characters(node.content) or '&nbsp;'
415422
current_layout = node.layout_info
423+
current_node = node
416424
elif node.type_ == CaptionNode.STYLE:
417425
resulting_style = self._calculate_resulting_style(
418426
node.content, caption_set

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
setup(
2626
name='pycaption',
27-
version='2.2.10',
27+
version='2.2.11',
2828
description='Closed caption converter',
2929
long_description=open(README_PATH).read(),
3030
author='Joe Norton',

tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@
5454
scc_that_generates_webvtt_with_proper_newlines,
5555
sample_scc_produces_captions_with_start_and_end_time_the_same,
5656
sample_scc_pop_on, sample_scc_multiple_positioning, sample_scc_with_italics,
57-
sample_scc_empty, sample_scc_roll_up_ru2, sample_no_positioning_at_all_scc,
57+
sample_scc_empty, sample_scc_roll_up_ru2, sample_scc_roll_up_ru3,
58+
sample_no_positioning_at_all_scc, sample_scc_with_line_too_long,
5859
sample_scc_no_explicit_end_to_last_caption, sample_scc_flashing_cue,
5960
sample_scc_eoc_first_command, sample_scc_with_extended_characters,
6061
sample_scc_with_ampersand_character, sample_scc_multiple_formats,
6162
sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
6263
sample_scc_tab_offset, sample_scc_with_unknown_commands,
6364
sample_scc_special_and_extended_characters,
64-
sample_scc_with_line_too_long
6565
)
6666
from tests.fixtures.srt import ( # noqa: F401
6767
sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,

tests/fixtures/scc.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,46 @@ def sample_scc_roll_up_ru2():
140140
141141
00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132
142142
143+
00:00:12;30 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132
144+
145+
00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf
146+
147+
00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7
148+
149+
00:00:17;01 9426 9426 94ad 94ad 9470 9470 57c8 4552 4520 d94f d5a7 5245 20d3 54c1 cec4 49ce c720 ce4f 572c
150+
151+
00:00:18;19 9426 9426 94ad 94ad 9470 9470 4c4f 4fcb 49ce c720 4fd5 5420 54c8 4552 452c 2054 c8c1 54a7 d320 c14c 4c
152+
153+
00:00:20;06 9426 9426 94ad 94ad 9470 9470 54c8 4520 4352 4f57 c4ae
154+
155+
00:00:21;24 9426 9426 94ad 94ad 9470 9470 3e3e 2049 5420 57c1 d320 c74f 4fc4 2054 4f20 c245 2049 ce20 54c8 45
156+
157+
00:00:34;27 94a7 94ad 9470 c16e 6420 f2e5 73f4 eff2 e520 49ef f761 a773 20ec 616e 642c 20f7 61f4 e5f2
158+
159+
00:00:36;12 94a7 94ad 9470 c16e 6420 f7e9 ec64 ece9 e6e5 ae80
160+
161+
00:00:44;08 94a7 94ad 9470 3e3e 20c2 e96b e520 49ef f761 2c20 79ef 75f2 2073 ef75 f2e3 e520 e6ef f280
162+
"""
163+
164+
165+
@pytest.fixture(scope="session")
166+
def sample_scc_roll_up_ru3():
167+
return """\
168+
Scenarist_SCC V1.0
169+
00:00:00;22 9425 9425 94ad 94ad 9470 9470 3e3e 3e20 c849 ae80
170+
171+
00:00:02;23 9425 9425 94ad 94ad 9470 9470 49a7 cd20 cb45 d649 ce20 43d5 cece 49ce c720 c1ce c420 c154
172+
173+
00:00:04;17 9425 9425 94ad 94ad 9470 9470 49ce d645 d354 4f52 a7d3 20c2 c1ce cb20 5745 20c2 454c 4945 d645 2049 ce80
174+
175+
00:00:06;04 9425 9425 94ad 94ad 9470 9470 c845 4cd0 49ce c720 54c8 4520 4c4f 43c1 4c20 ce45 49c7 c8c2 4f52 c84f 4fc4 d380
176+
177+
00:00:09;21 9425 9425 94ad 94ad 9470 9470 c1ce c420 49cd d052 4fd6 49ce c720 54c8 4520 4c49 d645 d320 4f46 20c1 4c4c
178+
179+
00:00:11;07 9425 9425 94ad 94ad 9470 9470 5745 20d3 4552 d645 ae80
180+
181+
00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132
182+
143183
00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf
144184
145185
00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7

0 commit comments

Comments
 (0)