|
4 | 4 | "metadata": { |
5 | 5 | "category_depth": 0, |
6 | 6 | "filename": "example.pdf", |
| 7 | + "filetype": "text/html", |
| 8 | + "languages": [ |
| 9 | + "eng" |
| 10 | + ], |
7 | 11 | "page_number": 1, |
8 | 12 | "parent_id": "897a8a47377c4ad6aab839a929879537", |
9 | 13 | "text_as_html": "<div class=\"Page\" data-page-number=\"1\" id=\"3a6b156a81764e17be128264241f8136\" />" |
|
16 | 20 | "metadata": { |
17 | 21 | "category_depth": 1, |
18 | 22 | "filename": "example.pdf", |
| 23 | + "filetype": "text/html", |
| 24 | + "languages": [ |
| 25 | + "eng" |
| 26 | + ], |
19 | 27 | "page_number": 1, |
20 | 28 | "parent_id": "3a6b156a81764e17be128264241f8136", |
21 | 29 | "text_as_html": "<header class=\"Header\" id=\"45b3d0053468484ba1c7b53998115412\" />" |
|
28 | 36 | "metadata": { |
29 | 37 | "category_depth": 2, |
30 | 38 | "filename": "example.pdf", |
| 39 | + "filetype": "text/html", |
| 40 | + "languages": [ |
| 41 | + "eng" |
| 42 | + ], |
31 | 43 | "page_number": 1, |
32 | 44 | "parent_id": "45b3d0053468484ba1c7b53998115412", |
33 | | - "text_as_html": "<h1 class=\"Title\" id=\"c95473e8a3704fc2b418697f9fddb27b\">Header </h1>" |
| 45 | + "text_as_html": "<h1 class=\"Title\" id=\"c95473e8a3704fc2b418697f9fddb27b\">Header</h1>" |
34 | 46 | }, |
35 | 47 | "text": "Header", |
36 | 48 | "type": "Title" |
|
40 | 52 | "metadata": { |
41 | 53 | "category_depth": 2, |
42 | 54 | "filename": "example.pdf", |
| 55 | + "filetype": "text/html", |
| 56 | + "languages": [ |
| 57 | + "eng" |
| 58 | + ], |
43 | 59 | "page_number": 1, |
44 | 60 | "parent_id": "45b3d0053468484ba1c7b53998115412", |
45 | | - "text_as_html": "<time class=\"CalendarDate\" id=\"379cbfdc16d44bd6a59e6cfabe6438d5\">Date: October 30, 2023 </time>" |
| 61 | + "text_as_html": "<time class=\"CalendarDate\" id=\"379cbfdc16d44bd6a59e6cfabe6438d5\">Date: October 30, 2023</time>" |
46 | 62 | }, |
47 | 63 | "text": "Date: October 30, 2023", |
48 | 64 | "type": "UncategorizedText" |
|
52 | 68 | "metadata": { |
53 | 69 | "category_depth": 1, |
54 | 70 | "filename": "example.pdf", |
| 71 | + "filetype": "text/html", |
| 72 | + "languages": [ |
| 73 | + "eng" |
| 74 | + ], |
55 | 75 | "page_number": 1, |
56 | 76 | "parent_id": "3a6b156a81764e17be128264241f8136", |
57 | | - "text_as_html": "<form class=\"Form\" id=\"637c2f6935fb4353a5f73025ce04619d\"> <label class=\"FormField\" for=\"company-name\" id=\"50027cccbe1948c9853ce0de37b635c2\">From field name </label><input class=\"FormFieldValue\" id=\"0032242af75c4b37984ea7fea9aac74c\" value=\"Example value\" /></form>" |
| 77 | + "text_as_html": "<form class=\"Form\" id=\"637c2f6935fb4353a5f73025ce04619d\"><label class=\"FormField\" for=\"company-name\" id=\"50027cccbe1948c9853ce0de37b635c2\">From field name</label><input class=\"FormFieldValue\" id=\"0032242af75c4b37984ea7fea9aac74c\" value=\"Example value\" /></form>" |
58 | 78 | }, |
59 | 79 | "text": "From field name Example value", |
60 | 80 | "type": "UncategorizedText" |
|
64 | 84 | "metadata": { |
65 | 85 | "category_depth": 1, |
66 | 86 | "filename": "example.pdf", |
| 87 | + "filetype": "text/html", |
| 88 | + "languages": [ |
| 89 | + "eng" |
| 90 | + ], |
67 | 91 | "page_number": 1, |
68 | 92 | "parent_id": "3a6b156a81764e17be128264241f8136", |
69 | 93 | "text_as_html": "<section class=\"Section\" id=\"592422373ed741b68a077e2003f8ed81\" />" |
|
76 | 100 | "metadata": { |
77 | 101 | "category_depth": 2, |
78 | 102 | "filename": "example.pdf", |
| 103 | + "filetype": "text/html", |
| 104 | + "languages": [ |
| 105 | + "eng" |
| 106 | + ], |
79 | 107 | "page_number": 1, |
80 | 108 | "parent_id": "592422373ed741b68a077e2003f8ed81", |
81 | | - "text_as_html": "<table class=\"Table\" id=\"dc3792d4422e444f90876b56d0cfb20d\"> <thead> <tr> <th>Description</th><th>Row header</th></tr></thead><tbody> <tr> <td>Value description</td><td>50 $ (1.32 %)</td></tr></tbody></table>" |
| 109 | + "text_as_html": "<table class=\"Table\" id=\"dc3792d4422e444f90876b56d0cfb20d\"><thead><tr><th>Description</th><th>Row header</th></tr></thead><tbody><tr><td>Value description</td><td><span>50 $</span><span>(1.32 %)</span></td></tr></tbody></table>" |
82 | 110 | }, |
83 | 111 | "text": "Description Row header Value description 50 $ (1.32 %)", |
84 | 112 | "type": "Table" |
|
88 | 116 | "metadata": { |
89 | 117 | "category_depth": 1, |
90 | 118 | "filename": "example.pdf", |
| 119 | + "filetype": "text/html", |
| 120 | + "languages": [ |
| 121 | + "eng" |
| 122 | + ], |
91 | 123 | "page_number": 1, |
92 | 124 | "parent_id": "3a6b156a81764e17be128264241f8136", |
93 | 125 | "text_as_html": "<section class=\"Section\" id=\"1032242af75c4b37984ea7fea9aac74c\" />" |
|
100 | 132 | "metadata": { |
101 | 133 | "category_depth": 2, |
102 | 134 | "filename": "example.pdf", |
| 135 | + "filetype": "text/html", |
| 136 | + "languages": [ |
| 137 | + "eng" |
| 138 | + ], |
103 | 139 | "page_number": 1, |
104 | 140 | "parent_id": "1032242af75c4b37984ea7fea9aac74c", |
105 | | - "text_as_html": "<h2 class=\"Subtitle\" id=\"2a4e2c4a689f4f9a8c180b6b521e45c3\">2. Subtitle </h2>" |
| 141 | + "text_as_html": "<h2 class=\"Subtitle\" id=\"2a4e2c4a689f4f9a8c180b6b521e45c3\">2. Subtitle</h2>" |
106 | 142 | }, |
107 | 143 | "text": "2. Subtitle", |
108 | 144 | "type": "Title" |
|
112 | 148 | "metadata": { |
113 | 149 | "category_depth": 2, |
114 | 150 | "filename": "example.pdf", |
| 151 | + "filetype": "text/html", |
| 152 | + "languages": [ |
| 153 | + "eng" |
| 154 | + ], |
115 | 155 | "page_number": 1, |
116 | 156 | "parent_id": "1032242af75c4b37984ea7fea9aac74c", |
117 | | - "text_as_html": "<p class=\"NarrativeText\" id=\"5591f7a4df01447e82515ce45f686fbe\">Paragraph text </p>" |
| 157 | + "text_as_html": "<p class=\"NarrativeText\" id=\"5591f7a4df01447e82515ce45f686fbe\">Paragraph text</p>" |
118 | 158 | }, |
119 | 159 | "text": "Paragraph text", |
120 | 160 | "type": "NarrativeText" |
|
0 commit comments