Skip to content

Commit d207d89

Browse files
author
Sven Siegmund
committed
Updated readme, config_loader
1 parent e7dc052 commit d207d89

File tree

5 files changed

+72
-16
lines changed

5 files changed

+72
-16
lines changed

README.md

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,60 @@
11
# rtfparse
22

3-
RTF Parser
3+
RTF Parser. So far it can only de-encapsulate HTML content from an RTF, but it properly parses the RTF structure and allows you to write your own custom RTF renderers. The HTML de-encapsulator provided with `rtfparse` is just one such custom renderer which liberates the HTML content from its RTF encapsulation and saves it in a given html file.
44

5-
## Call for collaboration
5+
# Dependencies
66

7-
This RTF parser needs your collaboration. Refer to the RTF Specification and create new parsable objects.
7+
See `requirements.txt`.
8+
9+
# Installation
10+
11+
Install rtfparse from your local repository with pip:
12+
13+
pip install ./rtfparse
14+
15+
Installation creates an executable file `rtfparse` in your python scripts folder which should be in your $PATH.
16+
17+
# First Run
18+
19+
When you run `rtfparse` for the first time it will start a configuration wizard which will guide you through the process of creating a default configuration file and specifying the location of its folders. (These folders don't mean much yet, they are more or less placeholders for upcoming program features.)
20+
21+
In the configuration wizard you can press `A` for care-free automatic configuration, which would look something like this:
22+
23+
```
24+
$ rtfparse
25+
Config file missing, creating new default config file
26+
27+
____ ____ __ _ ____ _ ____ _ _ ____ ____ ___ _ ____ __ _
28+
|___ [__] | \| |--- | |__, |__| |--< |--| | | [__] | \|
29+
_ _ _ ___ ____ ____ ___
30+
|/\| | /__ |--| |--< |__>
31+
32+
33+
◊ email_rtf (C:\Users\nagidal\rtfparse\email_rtf) does not exist!
34+
35+
(A) Automatically configure this and all remaining rtfparse settings
36+
(C) Create this path automatically
37+
(M) Manually input correct path to use or to create
38+
(Q) Quit and edit `email_rtf` in rtfparse_configuration.ini
39+
40+
Created directory C:\Users\nagidal\rtfparse
41+
Created directory C:\Users\nagidal\rtfparse\email_rtf
42+
Created directory C:\Users\nagidal\rtfparse\html
43+
```
44+
45+
`rtfparse` also creates the folder `.rtfparse` (beginning with a dot) in your home directory where it saves its default configuration and its log files.
46+
47+
# Usage
48+
49+
Use the `rtfparse` executable from the command line. For example if you want to de-encapsulate the HTML from an RTF file, do it like this:
50+
51+
rtfparse -f "path/to/rtf_file.rtf" -d "path/to/de_encapsulated.html"
52+
53+
Or you can de-encapsulate the HTML from an MS Outlook message, thanks to [extract_msg](https://github.com/TeamMsgExtractor/msg-extractor) and [compressed_rtf](https://github.com/delimitry/compressed_rtf):
54+
55+
rtfparse -m "path/to/email.msg" -d "path/to/de_encapsulated.html"
56+
57+
Command reference is in `rtfparse --help`.
858

959
# RTF Specification Links
1060

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
argcomplete
22
extract-msg
3+
compressed_rtf

src/rtfparse/config_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def wizard(self, error: errors.WrongConfiguration, autoconfig: bool) -> None:
190190
if "path does not exist" in error.message:
191191
reason = (f"{error.payload.internal_name} ({error.payload.path}) does not exist!")
192192
options = OrderedDict((
193-
("A", "Automatically configure this and all remaining claws settings"),
193+
("A", f"Automatically configure this and all remaining {utils.program_name} settings"),
194194
("C", "Create this path automatically"),
195195
("M", "Manually input correct path to use or to create"),
196196
("Q", f"Quit and edit `{error.payload.internal_name}` in {self.path_to_config_file.name}"),

src/rtfparse/entry.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,19 @@
77
import logging.config
88
import argparse
99
import argcomplete
10-
import io
1110
from argcomplete.completers import EnvironCompleter as EC
1211
from itertools import filterfalse
12+
import io
13+
import extract_msg as em
14+
import compressed_rtf as cr
1315
# Own modules
1416
from rtfparse import logging_conf
1517
from rtfparse import errors
1618
from rtfparse import utils
1719
from rtfparse import config_loader
1820
from rtfparse import version
1921
from rtfparse.parser import Rtf_Parser
22+
from rtfparse.renderers import encapsulated_html
2023

2124

2225
# Setup logging
@@ -48,17 +51,24 @@ def argument_parser() -> argparse.ArgumentParser:
4851
parser.add_argument("--autoconfig", action="store_true", help="Configure rtfparse automatically").completer = EC
4952
parser.add_argument("-f", "--file", action="store", metavar="PATH", type=pathlib.Path, help="path to the rtf file").completer = EC
5053
parser.add_argument("-m", "--msg", action="store", metavar="PATH", type=pathlib.Path, help="Parse RTF from MS Outlook's .msg file").completer = EC
54+
parser.add_argument("-d", "--de-encapsulate-html", action="store", metavar="PATH", type=pathlib.Path, help="De-encapsulate HTML from RTF").completer = EC
5155
return parser
5256

5357

58+
def de_encapsulate(rp: Rtf_Parser, target_file: pathlib.Path) -> None:
59+
renderer = encapsulated_html.Encapsulated_HTML()
60+
with open(target_file, mode="w", encoding="utf-8") as htmlfile:
61+
logger.info(f"Rendering the encapsulated HTML")
62+
renderer.render(rp.parsed, htmlfile)
63+
logger.info(f"Encapsulated HTML rendered")
64+
65+
5466
def run(config: config_loader.Config) -> None:
67+
rp = Rtf_Parser()
5568
if config.cli_args.file and config.cli_args.file.exists():
56-
rp = Rtf_Parser()
5769
with open(config.cli_args.file, mode="rb") as rtf_file:
5870
rp.parse_file(config, rtf_file)
5971
elif config.cli_args.msg:
60-
import extract_msg as em
61-
import compressed_rtf as cr
6272
msg = em.openMsg(f"{config.cli_args.msg}")
6373
for attachment in msg.attachments:
6474
with open(config.html / f"{attachment.longFilename}", mode="wb") as att_file:
@@ -67,14 +77,9 @@ def run(config: config_loader.Config) -> None:
6777
with open((config.email_rtf / config.cli_args.msg.name).with_suffix(".rtf"), mode="wb") as email_rtf:
6878
email_rtf.write(decompressed_rtf)
6979
with io.BytesIO(decompressed_rtf) as rtf_file:
70-
rp = Rtf_Parser()
7180
rp.parse_file(config, rtf_file)
72-
from rtfparse.renderers import encapsulated_html
73-
renderer = encapsulated_html.Encapsulated_HTML()
74-
with open((config.html / config.cli_args.msg.name).with_suffix(".html"), mode="w", encoding="utf-8") as htmlfile:
75-
logger.info(f"Rendering the encapsulated HTML")
76-
renderer.render(rp.parsed, htmlfile)
77-
logger.info(f"Encapsulated HTML rendered")
81+
if config.cli_args.de_encapsulate_html:
82+
de_encapsulate(rp, config.cli_args.de_encapsulate_html)
7883

7984

8085

src/rtfparse/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#!/usr/bin/env python
22

33

4-
version = "0.6.1"
4+
version = "0.7.1"

0 commit comments

Comments
 (0)