@@ -181,30 +181,35 @@ def partition_email(
181181 else :
182182 raise ValueError ("Only one of filename, file, or text can be specified." )
183183
184- content_map : Dict [str , str ] = {
185- part .get_content_type (): part .get_payload () for part in msg .walk ()
186- }
184+ content_map : Dict [str , str ] = {}
185+ for part in msg .walk ():
186+ # NOTE(robinson) - content dispostiion is None for the content of the email itself.
187+ # Other dispositions include "attachment" for attachments
188+ if part .get_content_disposition () is not None :
189+ continue
190+ content_type = part .get_content_type ()
191+ content_map [content_type ] = part .get_payload ()
187192
188193 content = content_map .get (content_source , "" )
189194 if not content :
190195 raise ValueError (f"{ content_source } content not found in email" )
191196
192- # NOTE(robinson) - In the .eml files, the HTML content gets stored in a format that
193- # looks like the following, resulting in extraneous "=" characters in the output if
194- # you don't clean it up
195- # <ul> =
196- # <li>Item 1</li>=
197- # <li>Item 2<li>=
198- # </ul>
199- list_content = split_by_paragraph (content )
200-
201197 if content_source == "text/html" :
198+ # NOTE(robinson) - In the .eml files, the HTML content gets stored in a format that
199+ # looks like the following, resulting in extraneous "=" characters in the output if
200+ # you don't clean it up
201+ # <ul> =
202+ # <li>Item 1</li>=
203+ # <li>Item 2<li>=
204+ # </ul>
205+ list_content = content .split ("=\n " )
202206 content = "" .join (list_content )
203207 elements = partition_html (text = content )
204208 for element in elements :
205209 if isinstance (element , Text ):
206210 element .apply (replace_mime_encodings )
207211 elif content_source == "text/plain" :
212+ list_content = split_by_paragraph (content )
208213 elements = partition_text (text = content )
209214
210215 for idx , element in enumerate (elements ):
0 commit comments