|
14 | 14 |
|
15 | 15 |
|
16 | 16 | class Encapsulated_HTML(Renderer): |
17 | | - def __init__(self) -> None: |
| 17 | + def __init__(self, ) -> None: |
18 | 18 | super().__init__() |
19 | | - self.render_func = dict(( |
20 | | - ("par", lambda x: "\n"), |
| 19 | + self.ignore_rtf = False |
| 20 | + self.render_word_func = dict(( |
| 21 | + ("par", self.newline), |
| 22 | + ("line", self.newline), |
| 23 | + ("tab", self.tab), |
| 24 | + ("fromhtml", self.check_fromhtml), |
| 25 | + ("htmlrtf", self.ignore_rtf_toggle), |
21 | 26 | )) |
| 27 | + self.ignore_groups = ( |
| 28 | + "fonttbl", |
| 29 | + "colortbl", |
| 30 | + ) |
| 31 | + def ignore_rtf_toggle(self, cw: entities.Control_Word) -> str: |
| 32 | + if cw.parameter == "" or cw.parameter == 1: |
| 33 | + self.ignore_rtf = True |
| 34 | + elif cw.parameter == 0: |
| 35 | + self.ignore_rtf = False |
| 36 | + return "" |
| 37 | + def check_fromhtml(self, cw: entities.Control_Word) -> str: |
| 38 | + if cw.parameter == 1: |
| 39 | + logger.info(f"Confirming that RTF was indeed generated from HTML") |
| 40 | + else: |
| 41 | + logger.warning(utils.warn(f"Encountered a part of RTF which was not generated from HTML")) |
| 42 | + logger.warning(utils.warn(f"This might not be the right renderer for it.")) |
| 43 | + return "" |
| 44 | + def newline(self, cw: entities.Control_Word) -> str: |
| 45 | + if self.ignore_rtf: |
| 46 | + return "" |
| 47 | + else: |
| 48 | + return "\n" |
| 49 | + def tab(self, cw: entities.Control_Word) -> str: |
| 50 | + if self.ignore_rtf: |
| 51 | + return "" |
| 52 | + else: |
| 53 | + return "\t" |
| 54 | + def render_symbol(self, item: entities.Control_Symbol, file: io.TextIOWrapper) -> None: |
| 55 | + if not self.ignore_rtf: |
| 56 | + # Obsolete formula character used by Word 5.1 for Macintosh |
| 57 | + if item.text == "|": |
| 58 | + pass |
| 59 | + # Non-breaking space |
| 60 | + elif item.text == "~": |
| 61 | + file.write("\u00a0") |
| 62 | + # Optional hyphen |
| 63 | + elif item.text == "-": |
| 64 | + pass |
| 65 | + # Non-breaking hyphen |
| 66 | + elif item.text == "_": |
| 67 | + file.write("\u2011") |
| 68 | + # Subentry in an index entry |
| 69 | + elif item.text == ":": |
| 70 | + pass |
| 71 | + # Ignorable outside of Group |
| 72 | + elif item.text == "*": |
| 73 | + logger.warning(utils.warn(f"Found an IGNORABLE control symbol which is not a group start!")) |
| 74 | + # Probably any symbol converted from a hex code: \'hh |
| 75 | + else: |
| 76 | + file.write(item.text) |
22 | 77 | def render(self, parsed: entities.Group, file: io.TextIOWrapper) -> None: |
23 | 78 | for item in parsed.structure: |
24 | 79 | if isinstance(item, entities.Group): |
25 | | - self.render(item, file) |
| 80 | + if item.name not in self.ignore_groups: |
| 81 | + self.render(item, file) |
26 | 82 | elif isinstance(item, entities.Control_Word): |
27 | 83 | try: |
28 | | - file.write(self.render_func[item.control_name](item)) |
| 84 | + file.write(self.render_word_func[item.control_name](item)) |
29 | 85 | except KeyError: |
30 | 86 | pass |
| 87 | + elif isinstance(item, entities.Control_Symbol): |
| 88 | + self.render_symbol(item, file) |
| 89 | + elif isinstance(item, entities.Plain_Text): |
| 90 | + if not self.ignore_rtf: |
| 91 | + file.write(item.text) |
31 | 92 | else: |
32 | 93 | pass |
33 | 94 |
|
|
0 commit comments