@@ -56,7 +56,9 @@ In the current version the option `--embed-img` does nothing.
5656
5757# Programatic usage in a Python module
5858
59- ```
59+ ## Decapsulate HTML from an uncompressed RTF file
60+
61+ ``` py
6062from pathlib import Path
6163from rtfparse.parser import Rtf_Parser
6264from rtfparse.renderers.html_decapsulator import HTML_Decapsulator
@@ -75,6 +77,37 @@ with open(target_path, mode="w", encoding="utf-8") as html_file:
7577 renderer.render(parsed, html_file)
7678```
7779
80+ ## Decapsulate HTML from an MS Outlook msg file
81+
82+ ``` py
83+ from pathlib import Path
84+ from extract_msg import openMsg
85+ from compressed_rtf import decompress
86+ from io import BytesIO
87+ from rtfparse.parser import Rtf_Parser
88+ from rtfparse.renderers.html_decapsulator import HTML_Decapsulator
89+
90+
91+ source_file = Path(" path/to/your/source.msg" )
92+ target_file = Path(r " path/to/your/target. html" )
93+ # Create parent directory of `target_path` if it does not already exist:
94+ target_file.parent.mkdir(parents = True , exist_ok = True )
95+
96+ # Get a decompressed RTF bytes buffer from the MS Outlook message
97+ msg = openMsg(source_file)
98+ decompressed_rtf = decompress(msg.compressedRtf)
99+ rtf_buffer = BytesIO(decompressed_rtf)
100+
101+ # Parse the rtf buffer
102+ parser = Rtf_Parser(rtf_file = rtf_buffer)
103+ parsed = parser.parse_file()
104+
105+ # Decapsulate the HTML from the parsed RTF
106+ decapsulator = HTML_Decapsulator()
107+ with open (target_file, mode = " w" , encoding = " utf-8" ) as html_file:
108+ decapsulator.render(parsed, html_file)
109+ ```
110+
78111# RTF Specification Links
79112
80113If you find a working official Microsoft link to the RTF specification and add it here, you'll be remembered fondly.
0 commit comments