You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: README.md
+6-2Lines changed: 6 additions & 2 deletions
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -44,18 +44,22 @@ Created directory C:\Users\nagidal\rtfparse\html
44
44
45
45
`rtfparse` also creates the folder `.rtfparse` (beginning with a dot) in your home directory where it saves its default configuration and its log files.
46
46
47
-
# Usage
47
+
# Usage From Command Line
48
48
49
49
Use the `rtfparse` executable from the command line. For example if you want to de-encapsulate the HTML from an RTF file, do it like this:
Or you can de-encapsulate the HTML from an MS Outlook message, thanks to [extract_msg](https://github.com/TeamMsgExtractor/msg-extractor) and [compressed_rtf](https://github.com/delimitry/compressed_rtf):
parser.add_argument("-f", "--file", action="store", metavar="PATH", type=pathlib.Path, help="path to the rtf file").completer=EC
53
53
parser.add_argument("-m", "--msg", action="store", metavar="PATH", type=pathlib.Path, help="Parse RTF from MS Outlook's .msg file").completer=EC
54
-
parser.add_argument("-d", "--de-encapsulate-html", action="store", metavar="PATH", type=pathlib.Path, help="De-encapsulate HTML from RTF").completer=EC
54
+
parser.add_argument("-d", "--de-encapsulate-html", action="store_true", help="De-encapsulate HTML from RTF").completer=EC
# if any item is a Control_Word which has a parameter, we assume that this is the parameter of \ansicpg, and that corresponds to the codepage we are looking for
48
+
ifitem.parameter:
49
+
param=item.parameter
50
+
ifnotparam:
51
+
ifnames[0].control_name=="ansi":
52
+
encoding="ansi"
53
+
elifnames[0].control_name=="mac":
54
+
encoding="mac_roman"
55
+
elifnames[0].control_name=="pc":
56
+
encoding="cp437"
57
+
elifnames[0].control_name=="pca":
58
+
encoding="cp850"
59
+
else:
60
+
encoding=f"cp{param}"
61
+
file.seek(0)
62
+
returnencoding
63
+
defparse_file(self) ->entities.Group:
64
+
ifself.rtf_pathisnotNone:
65
+
file=open(self.rtf_path, mode="rb")
66
+
elifself.rtf_fileisnotNone:
67
+
file=self.rtf_file
68
+
else:
69
+
file=io.BytesIO(b"")
25
70
parsed_object=utils.what_is_being_parsed(file)
26
71
logger.info(f"Parsing the structure of {parsed_object}")
27
72
try:
28
-
self.parsed=entities.Group(config, file)
29
-
excepterrors.UnexpectedEndOfFileErroraserr:
30
-
logger.error(f"{err}")
73
+
encoding=self.read_encoding(file)
74
+
self.parsed=entities.Group(encoding, file)
31
75
exceptExceptionaserr:
32
76
logger.exception(err)
33
77
finally:
78
+
ifself.rtf_pathisnotNone:
79
+
logger.debug(f"Closing {parsed_object}")
80
+
file.close()
34
81
logger.info(f"Structure of {parsed_object} parsed")
0 commit comments