|
| 1 | +import datetime |
1 | 2 | import email |
2 | 3 | import os |
3 | 4 | import pathlib |
4 | 5 | import pytest |
5 | 6 |
|
| 7 | + |
6 | 8 | from unstructured.documents.elements import NarrativeText, Title, ListItem, Image |
7 | 9 | from unstructured.documents.email_elements import ( |
8 | 10 | MetaData, |
9 | 11 | Recipient, |
10 | 12 | Sender, |
11 | 13 | Subject, |
| 14 | + ReceivedInfo, |
12 | 15 | ) |
13 | 16 | from unstructured.partition.email import ( |
14 | 17 | extract_attachment_info, |
|
36 | 39 | ListItem(text="Violets are blue"), |
37 | 40 | ] |
38 | 41 |
|
| 42 | +RECEIVED_HEADER_OUTPUT = [ |
| 43 | + ReceivedInfo(name="ABCDEFG-000.ABC.guide", text="00.0.0.00"), |
| 44 | + ReceivedInfo(name="ABCDEFG-000.ABC.guide", text="ba23::58b5:2236:45g2:88h2"), |
| 45 | + ReceivedInfo( |
| 46 | + name="received_datetimetz", |
| 47 | + text="2023-02-20 10:03:18+12:00", |
| 48 | + datestamp=datetime.datetime( |
| 49 | + 2023, 2, 20, 10, 3, 18, tzinfo=datetime.timezone(datetime.timedelta(seconds=43200)) |
| 50 | + ), |
| 51 | + ), |
| 52 | + MetaData(name="MIME-Version", text="1.0"), |
| 53 | + MetaData(name="Date", text="Fri, 16 Dec 2022 17:04:16 -0500"), |
| 54 | + MetaData( |
| 55 | + name="Message-ID", |
| 56 | + text="<CADc-_xaLB2FeVQ7mNsoX+NJb_7hAJhBKa_zet-rtgPGenj0uVw@mail.gmail.com>", |
| 57 | + ), |
| 58 | + Subject(text="Test Email"), |
| 59 | + Sender( name="Matthew Robinson", text="[email protected]"), |
| 60 | + Recipient( name="Matthew Robinson", text="[email protected]"), |
| 61 | + MetaData( |
| 62 | + name="Content-Type", text='multipart/alternative; boundary="00000000000095c9b205eff92630"' |
| 63 | + ), |
| 64 | +] |
| 65 | + |
39 | 66 | HEADER_EXPECTED_OUTPUT = [ |
40 | 67 | MetaData(name="MIME-Version", text="1.0"), |
41 | 68 | MetaData(name="Date", text="Fri, 16 Dec 2022 17:04:16 -0500"), |
@@ -114,12 +141,12 @@ def test_partition_email_from_filename_with_embedded_image(): |
114 | 141 |
|
115 | 142 |
|
116 | 143 | def test_partition_email_header(): |
117 | | - filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "fake-email.eml") |
| 144 | + filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "fake-email-header.eml") |
118 | 145 | with open(filename, "r") as f: |
119 | 146 | msg = email.message_from_file(f) |
120 | 147 | elements = partition_email_header(msg) |
121 | 148 | assert len(elements) > 0 |
122 | | - assert elements == HEADER_EXPECTED_OUTPUT |
| 149 | + assert elements == RECEIVED_HEADER_OUTPUT |
123 | 150 |
|
124 | 151 |
|
125 | 152 | def test_extract_email_text_matches_html(): |
|
0 commit comments