Skip to content

Commit 9612031

Browse files
author
Sven Siegmund
committed
rtfparser can now handle \binN control words
1 parent d82c8d3 commit 9612031

File tree

5 files changed

+39
-2
lines changed

5 files changed

+39
-2
lines changed

src/rtfparse/entities.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def __init__(self, config: config_loader.Config, file: io.BufferedReader) -> Non
6969
logger.debug(f"Reading Control Word at file position {file.tell()}")
7070
self.control_name = "missing"
7171
self.parameter = ""
72+
self.bindata = b""
7273
self.start_position = file.tell()
7374
logger.debug(f"Starting at file position {self.start_position}")
7475
probe = file.read(CONTROL_WORD)
@@ -86,6 +87,9 @@ def __init__(self, config: config_loader.Config, file: io.BufferedReader) -> Non
8687
logger.debug(f"Delimiter is {match.group('other').decode(self.config.default_encoding)}, len: {len(match.group('delimiter'))}")
8788
target_position -= len(match.group("delimiter"))
8889
file.seek(target_position)
90+
# handle \binN:
91+
if self.control_name == "bin":
92+
self.bindata = file.read(utils.twos_complement(self.parameter, INTEGER_MAGNITUDE))
8993
else:
9094
logger.warning(f"Missing Control Word")
9195
file.seek(self.start_position)

src/rtfparse/entry.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def run(config: config_loader.Config) -> None:
8282
de_encapsulate(rp, config.cli_args.de_encapsulate_html)
8383

8484

85-
8685
def cli_start(version) -> None:
8786
"""
8887
Entry point for any component start from the commmand line

src/rtfparse/minimal.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python
2+
3+
4+
def tc(val, nbits):
5+
"""Compute the 2's complement of int value val. Credit: https://stackoverflow.com/a/37075643/9235421"""
6+
if val < 0:
7+
if (val + 1).bit_length() >= nbits:
8+
raise ValueError(f"Value {val} is out of range of {nbits}-bit value.")
9+
val = (1 << nbits) + val
10+
else:
11+
if val.bit_length() > nbits:
12+
raise ValueError(f"Value {val} is out of range of {nbits}-bit value.")
13+
# If sign bit is set.
14+
if (val & (1 << (nbits - 1))) != 0:
15+
# compute negative value.
16+
val = val - (1 << nbits)
17+
return val

src/rtfparse/re_patterns.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,12 @@ def no_capture(content: bytes) -> bytes:
5656
# int16 = minus + digit + rb"{1,5}"
5757
parameter_pattern = named_regex_group("parameter", digit)
5858
space = named_regex_group("space", rb" ")
59+
newline = named_regex_group("newline", _newline)
5960
other = named_regex_group("other", group(rb"^" + _letters + _digits))
6061

6162

6263
ascii_letter_sequence = named_regex_group("control_name", ascii_letters + parameter_pattern + rb"?")
63-
delimiter = named_regex_group("delimiter", rb"|".join((space, other, rb"$")))
64+
delimiter = named_regex_group("delimiter", rb"|".join((space, newline, other, rb"$")))
6465
symbol = named_regex_group("symbol", other)
6566
control_word_pattern = named_regex_group("control_word", rtf_backslash + ascii_letter_sequence + delimiter)
6667
pcdata_delimiter = no_capture(rb"|".join((rtf_brace_open, rtf_brace_close, control_word_pattern)))

src/rtfparse/utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,19 @@ def what_is_being_parsed(file: Union[io.BufferedReader, io.BytesIO]) -> str:
5151
return file.name
5252
elif isinstance(file, io.BytesIO):
5353
return repr(file)
54+
55+
56+
def twos_complement(val, nbits):
57+
"""Compute the 2's complement of int value val. Credit: https://stackoverflow.com/a/37075643/9235421"""
58+
if val < 0:
59+
if (val + 1).bit_length() >= nbits:
60+
raise ValueError(f"Value {val} is out of range of {nbits}-bit value.")
61+
val = (1 << nbits) + val
62+
else:
63+
if val.bit_length() > nbits:
64+
raise ValueError(f"Value {val} is out of range of {nbits}-bit value.")
65+
# If sign bit is set.
66+
if (val & (1 << (nbits - 1))) != 0:
67+
# compute negative value.
68+
val = val - (1 << nbits)
69+
return val

0 commit comments

Comments
 (0)