|
1 | 1 | [ |
2 | 2 | { |
3 | | - "element_id": "3a6b156a81764e17be128264241f8136", |
| 3 | + "element_id": "eda37931eb954fcc8dec8804c7e8fa4c", |
4 | 4 | "metadata": { |
5 | 5 | "category_depth": 0, |
6 | | - "filename": "example.pdf", |
| 6 | + "file_directory": "test_unstructured/documents/html_files", |
| 7 | + "filename": "example.html", |
7 | 8 | "filetype": "text/html", |
8 | 9 | "languages": [ |
9 | 10 | "eng" |
10 | 11 | ], |
| 12 | + "last_modified": "2025-06-12T11:12:20", |
11 | 13 | "page_number": 1, |
12 | | - "parent_id": "897a8a47377c4ad6aab839a929879537", |
| 14 | + "parent_id": "037b418b76eb4ac1bd40326ff67e67b0", |
13 | 15 | "text_as_html": "<div class=\"Page\" data-page-number=\"1\" />" |
14 | 16 | }, |
15 | 17 | "text": "", |
16 | 18 | "type": "UncategorizedText" |
17 | 19 | }, |
18 | 20 | { |
19 | | - "element_id": "45b3d0053468484ba1c7b53998115412", |
| 21 | + "element_id": "97eb491421584ad892074d039779fbfa", |
20 | 22 | "metadata": { |
21 | 23 | "category_depth": 1, |
22 | | - "filename": "example.pdf", |
| 24 | + "file_directory": "test_unstructured/documents/html_files", |
| 25 | + "filename": "example.html", |
23 | 26 | "filetype": "text/html", |
24 | 27 | "languages": [ |
25 | 28 | "eng" |
26 | 29 | ], |
| 30 | + "last_modified": "2025-06-12T11:12:20", |
27 | 31 | "page_number": 1, |
28 | | - "parent_id": "3a6b156a81764e17be128264241f8136", |
29 | | - "text_as_html": "<header class=\"Header\" />" |
| 32 | + "parent_id": "eda37931eb954fcc8dec8804c7e8fa4c", |
| 33 | + "text_as_html": "<header class=\"Header\"><h1 class=\"Title\">Header</h1><time class=\"CalendarDate\">Date: October 30, 2023</time></header>" |
30 | 34 | }, |
31 | | - "text": "", |
32 | | - "type": "UncategorizedText" |
33 | | - }, |
34 | | - { |
35 | | - "element_id": "c95473e8a3704fc2b418697f9fddb27b", |
36 | | - "metadata": { |
37 | | - "category_depth": 2, |
38 | | - "filename": "example.pdf", |
39 | | - "filetype": "text/html", |
40 | | - "languages": [ |
41 | | - "eng" |
42 | | - ], |
43 | | - "page_number": 1, |
44 | | - "parent_id": "45b3d0053468484ba1c7b53998115412", |
45 | | - "text_as_html": "<h1 class=\"Title\">Header</h1>" |
46 | | - }, |
47 | | - "text": "Header", |
48 | | - "type": "Title" |
49 | | - }, |
50 | | - { |
51 | | - "element_id": "379cbfdc16d44bd6a59e6cfabe6438d5", |
52 | | - "metadata": { |
53 | | - "category_depth": 2, |
54 | | - "filename": "example.pdf", |
55 | | - "filetype": "text/html", |
56 | | - "languages": [ |
57 | | - "eng" |
58 | | - ], |
59 | | - "page_number": 1, |
60 | | - "parent_id": "45b3d0053468484ba1c7b53998115412", |
61 | | - "text_as_html": "<time class=\"CalendarDate\">Date: October 30, 2023</time>" |
62 | | - }, |
63 | | - "text": "Date: October 30, 2023", |
64 | | - "type": "UncategorizedText" |
| 35 | + "text": "Header Date: October 30, 2023", |
| 36 | + "type": "Header" |
65 | 37 | }, |
66 | 38 | { |
67 | | - "element_id": "637c2f6935fb4353a5f73025ce04619d", |
| 39 | + "element_id": "4afb6e4a90e14835b958dadb77cd8331", |
68 | 40 | "metadata": { |
69 | 41 | "category_depth": 1, |
70 | | - "filename": "example.pdf", |
| 42 | + "file_directory": "test_unstructured/documents/html_files", |
| 43 | + "filename": "example.html", |
71 | 44 | "filetype": "text/html", |
72 | 45 | "languages": [ |
73 | 46 | "eng" |
74 | 47 | ], |
| 48 | + "last_modified": "2025-06-12T11:12:20", |
75 | 49 | "page_number": 1, |
76 | | - "parent_id": "3a6b156a81764e17be128264241f8136", |
| 50 | + "parent_id": "eda37931eb954fcc8dec8804c7e8fa4c", |
77 | 51 | "text_as_html": "<form class=\"Form\"><label class=\"FormField\" for=\"company-name\">From field name</label><input class=\"FormFieldValue\" value=\"Example value\" /></form>" |
78 | 52 | }, |
79 | 53 | "text": "From field name Example value", |
80 | 54 | "type": "UncategorizedText" |
81 | 55 | }, |
82 | 56 | { |
83 | | - "element_id": "592422373ed741b68a077e2003f8ed81", |
| 57 | + "element_id": "d8f996f2bc9a49f4979aac58a2a9ee93", |
84 | 58 | "metadata": { |
85 | 59 | "category_depth": 1, |
86 | | - "filename": "example.pdf", |
| 60 | + "file_directory": "test_unstructured/documents/html_files", |
| 61 | + "filename": "example.html", |
87 | 62 | "filetype": "text/html", |
88 | 63 | "languages": [ |
89 | 64 | "eng" |
90 | 65 | ], |
| 66 | + "last_modified": "2025-06-12T11:12:20", |
91 | 67 | "page_number": 1, |
92 | | - "parent_id": "3a6b156a81764e17be128264241f8136", |
| 68 | + "parent_id": "eda37931eb954fcc8dec8804c7e8fa4c", |
93 | 69 | "text_as_html": "<section class=\"Section\" />" |
94 | 70 | }, |
95 | 71 | "text": "", |
96 | 72 | "type": "UncategorizedText" |
97 | 73 | }, |
98 | 74 | { |
99 | | - "element_id": "dc3792d4422e444f90876b56d0cfb20d", |
| 75 | + "element_id": "d2c12f995ab248808900f66aec479e9d", |
100 | 76 | "metadata": { |
101 | 77 | "category_depth": 2, |
102 | | - "filename": "example.pdf", |
| 78 | + "file_directory": "test_unstructured/documents/html_files", |
| 79 | + "filename": "example.html", |
103 | 80 | "filetype": "text/html", |
104 | 81 | "languages": [ |
105 | 82 | "eng" |
106 | 83 | ], |
| 84 | + "last_modified": "2025-06-12T11:12:20", |
107 | 85 | "page_number": 1, |
108 | | - "parent_id": "592422373ed741b68a077e2003f8ed81", |
| 86 | + "parent_id": "d8f996f2bc9a49f4979aac58a2a9ee93", |
109 | 87 | "text_as_html": "<table class=\"Table\"><thead><tr><th>Description</th><th>Row header</th></tr></thead><tbody><tr><td>Value description</td><td><span>50 $</span><span>(1.32 %)</span></td></tr></tbody></table>" |
110 | 88 | }, |
111 | 89 | "text": "Description Row header Value description 50 $ (1.32 %)", |
112 | 90 | "type": "Table" |
113 | 91 | }, |
114 | 92 | { |
115 | | - "element_id": "1032242af75c4b37984ea7fea9aac74c", |
| 93 | + "element_id": "8e3f0d85329343008593f43afcad3327", |
116 | 94 | "metadata": { |
117 | 95 | "category_depth": 1, |
118 | | - "filename": "example.pdf", |
| 96 | + "file_directory": "test_unstructured/documents/html_files", |
| 97 | + "filename": "example.html", |
119 | 98 | "filetype": "text/html", |
120 | 99 | "languages": [ |
121 | 100 | "eng" |
122 | 101 | ], |
| 102 | + "last_modified": "2025-06-12T11:12:20", |
123 | 103 | "page_number": 1, |
124 | | - "parent_id": "3a6b156a81764e17be128264241f8136", |
| 104 | + "parent_id": "eda37931eb954fcc8dec8804c7e8fa4c", |
125 | 105 | "text_as_html": "<section class=\"Section\" />" |
126 | 106 | }, |
127 | 107 | "text": "", |
128 | 108 | "type": "UncategorizedText" |
129 | 109 | }, |
130 | 110 | { |
131 | | - "element_id": "2a4e2c4a689f4f9a8c180b6b521e45c3", |
| 111 | + "element_id": "5deaad75854741ccb69767881ef399db", |
132 | 112 | "metadata": { |
133 | 113 | "category_depth": 2, |
134 | | - "filename": "example.pdf", |
| 114 | + "file_directory": "test_unstructured/documents/html_files", |
| 115 | + "filename": "example.html", |
135 | 116 | "filetype": "text/html", |
136 | 117 | "languages": [ |
137 | 118 | "eng" |
138 | 119 | ], |
| 120 | + "last_modified": "2025-06-12T11:12:20", |
139 | 121 | "page_number": 1, |
140 | | - "parent_id": "1032242af75c4b37984ea7fea9aac74c", |
| 122 | + "parent_id": "8e3f0d85329343008593f43afcad3327", |
141 | 123 | "text_as_html": "<h2 class=\"Subtitle\">2. Subtitle</h2>" |
142 | 124 | }, |
143 | 125 | "text": "2. Subtitle", |
144 | 126 | "type": "Title" |
145 | 127 | }, |
146 | 128 | { |
147 | | - "element_id": "5591f7a4df01447e82515ce45f686fbe", |
| 129 | + "element_id": "9e61f29755bc4b6dbb41ea575d41edb6", |
148 | 130 | "metadata": { |
149 | 131 | "category_depth": 2, |
150 | | - "filename": "example.pdf", |
| 132 | + "file_directory": "test_unstructured/documents/html_files", |
| 133 | + "filename": "example.html", |
151 | 134 | "filetype": "text/html", |
152 | 135 | "languages": [ |
153 | 136 | "eng" |
154 | 137 | ], |
| 138 | + "last_modified": "2025-06-12T11:12:20", |
155 | 139 | "page_number": 1, |
156 | | - "parent_id": "1032242af75c4b37984ea7fea9aac74c", |
| 140 | + "parent_id": "8e3f0d85329343008593f43afcad3327", |
157 | 141 | "text_as_html": "<p class=\"NarrativeText\">Paragraph text</p>" |
158 | 142 | }, |
159 | 143 | "text": "Paragraph text", |
|
0 commit comments