|
3 | 3 |
|
4 | 4 | import io |
5 | 5 | import logging |
| 6 | +import re |
6 | 7 | from rtfparse import re_patterns |
| 8 | +from rtfparse import utils |
| 9 | +from rtfparse.enums import Bytestring_Type |
7 | 10 |
|
8 | 11 |
|
9 | 12 | # Setup logging |
10 | 13 | logger = logging.getLogger(__name__) |
11 | 14 |
|
12 | 15 |
|
13 | | -class Destination_Group: |
| 16 | +# Constants, number of bytes to read when creating entities |
| 17 | +CHARACTER = BACKSLASH = len(b"\\") |
| 18 | +IGNORABLE = BACKSLASH + len(rb"*") |
| 19 | +GROUP_START = len(rb"x") + IGNORABLE # x = "}" cannot have a rogue brace for vim's auto-indent's sake |
| 20 | +DELIMITER = len(rb" ") |
| 21 | +MAX_CW_LETTERS = 32 |
| 22 | +INTEGER_MAGNITUDE = 32 |
| 23 | +CONTROL_WORD = BACKSLASH + MAX_CW_LETTERS + len(rb"-") + len(str((1 << INTEGER_MAGNITUDE) // 2)) + DELIMITER |
| 24 | + |
| 25 | + |
| 26 | +class Entity: |
| 27 | + @classmethod |
| 28 | + def probe(cls, pattern: re_patterns.Bytes_Regex, file: io.BufferedReader) -> Bytestring_Type: |
| 29 | + logger.debug(f"in Entity.probed") |
| 30 | + original_position = file.tell() |
| 31 | + probed = file.read(len(re_patterns.probe_pattern)) |
| 32 | + logger.debug(f"{probed = }") |
| 33 | + file.seek(original_position - len(probed)) |
| 34 | + if (match := re_patterns.group_start.match(probed)): |
| 35 | + result = Bytestring_Type.GROUP_START |
| 36 | + elif (match := re_patterns.group_end.match(probed)): |
| 37 | + result = Bytestring_Type.GROUP_END |
| 38 | + elif (match := re_patterns.control_word.match(probed)): |
| 39 | + result = Bytestring_Type.CONTROL_WORD |
| 40 | + elif (match := re_patterns.control_symbol.match(probed)): |
| 41 | + result = Bytestring_Type.CONTROL_SYMBOL |
| 42 | + else: |
| 43 | + result = Bytestring_Type.PLAIN_TEXT |
| 44 | + logger.debug(f"{result = }") |
| 45 | + return result |
| 46 | + |
| 47 | + |
| 48 | +class Control_Word(Entity): |
| 49 | + def __init__(self, file: io.BufferedReader) -> None: |
| 50 | + logger.debug(f"Control_Word.__init__") |
| 51 | + self.start_position = file.tell() |
| 52 | + logger.debug(f"Starting at file position {self.start_position}") |
| 53 | + probe = file.read(CONTROL_WORD) |
| 54 | + if (match := re_patterns.control_word.match(probe)): |
| 55 | + self.control_name = match.group("control_name") |
| 56 | + logger.debug(f"{self.control_name = }") |
| 57 | + self.parameter = match.group("parameter") |
| 58 | + file.seek(self.start_position + match.span()[1]) |
| 59 | + |
| 60 | + |
| 61 | +class Destination_Group(Entity): |
14 | 62 | def __init__(self, file: io.BufferedReader) -> None: |
| 63 | + logger.debug(f"Destination_Group.__init__") |
15 | 64 | logger.debug(f"Creating destination group from {file.name}") |
| 65 | + self.known = False |
| 66 | + self.name = "unknown" |
| 67 | + self.ignorable = False |
| 68 | + self.start_position = file.tell() |
| 69 | + logger.debug(f"Starting at file position {self.start_position}") |
| 70 | + probe = file.read(GROUP_START) |
| 71 | + logger.debug(f"Read file up to position {file.tell()}") |
| 72 | + if (match := re_patterns.group_start.match(probe)): |
| 73 | + self.known = bool(match.group("group_start")) |
| 74 | + self.ignorable = bool(match.group("ignorable")) |
| 75 | + if not self.ignorable: |
| 76 | + file.seek(-IGNORABLE, io.SEEK_CUR) |
| 77 | + logger.debug(f"Returned to position {file.tell()}") |
| 78 | + self.cw = Control_Word(file) |
| 79 | + self.name = self.cw.control_name |
| 80 | + else: |
| 81 | + logger.warning(utils.warn(f"Expected group has no start. Creating unknown group")) |
| 82 | + probed = self.probe(re_patterns.probe, file) |
| 83 | + self.content = list() |
16 | 84 |
|
17 | 85 |
|
18 | 86 | if __name__ == "__main__": |
|
0 commit comments