|
2 | 2 | "metadata": { |
3 | 3 | "dataset_name": "real_preprocessed_html_test", |
4 | 4 | "extractor_name": "llm-webkit", |
5 | | - "timestamp": "2025-08-08T16:08:47.516977", |
| 5 | + "timestamp": "2025-08-13T14:53:57.558572", |
6 | 6 | "total_samples": 2 |
7 | 7 | }, |
8 | 8 | "overall_metrics": { |
9 | | - "text_edit": 0.5209619232317955, |
| 9 | + "text_edit": 0.045309156685715835, |
10 | 10 | "code_edit": 0.0, |
11 | 11 | "table_edit": 0.0, |
12 | 12 | "table_TEDS": 0.0, |
13 | 13 | "formula_edit": 0.0, |
14 | | - "overall": 0.10419238464635909 |
| 14 | + "overall": 0.009061831337143167 |
15 | 15 | }, |
16 | 16 | "sample_results": [ |
17 | 17 | { |
18 | 18 | "sample_id": "33e291cd-5b26-48b1-977f-3c63b45e6d13", |
19 | 19 | "extraction_success": true, |
20 | | - "extraction_time": 1.0598080158233643, |
| 20 | + "extraction_time": 0.6193361282348633, |
21 | 21 | "metrics": { |
22 | 22 | "code_edit": { |
23 | 23 | "score": 0.0, |
24 | 24 | "success": true, |
25 | 25 | "details": { |
26 | | - "distance": 499, |
27 | | - "predicted_length": 499, |
| 26 | + "distance": 505, |
| 27 | + "predicted_length": 505, |
28 | 28 | "groundtruth_length": 0, |
29 | 29 | "normalized": true, |
30 | | - "predicted_code_length": 499, |
| 30 | + "predicted_code_length": 505, |
31 | 31 | "groundtruth_code_length": 0, |
32 | 32 | "content_type": "code" |
33 | 33 | } |
|
43 | 43 | "error": "Both predicted and groundtruth are empty" |
44 | 44 | }, |
45 | 45 | "text_edit": { |
46 | | - "score": 0.16059957173447537, |
| 46 | + "score": 0.09025270758122739, |
47 | 47 | "success": true, |
48 | 48 | "details": { |
49 | | - "distance": 392, |
50 | | - "predicted_length": 467, |
| 49 | + "distance": 252, |
| 50 | + "predicted_length": 25, |
51 | 51 | "groundtruth_length": 277, |
52 | 52 | "normalized": true, |
53 | | - "predicted_text_length": 467, |
| 53 | + "predicted_text_length": 25, |
54 | 54 | "groundtruth_text_length": 277, |
55 | 55 | "content_type": "text" |
56 | 56 | } |
57 | 57 | }, |
58 | 58 | "table_edit": { |
59 | 59 | "score": 0.0, |
60 | | - "success": true, |
| 60 | + "success": false, |
61 | 61 | "details": { |
62 | | - "distance": 43, |
63 | | - "predicted_length": 43, |
64 | | - "groundtruth_length": 0, |
65 | | - "normalized": true, |
66 | | - "predicted_table_length": 43, |
| 62 | + "predicted_table_length": 0, |
67 | 63 | "groundtruth_table_length": 0, |
68 | 64 | "content_type": "table" |
69 | | - } |
| 65 | + }, |
| 66 | + "error": "Both predicted and groundtruth are empty" |
70 | 67 | }, |
71 | 68 | "table_TEDS": { |
72 | 69 | "score": 0.0, |
73 | | - "success": true, |
| 70 | + "success": false, |
74 | 71 | "details": { |
75 | | - "edit_distance": 4.0, |
76 | | - "predicted_nodes": 4, |
77 | | - "groundtruth_nodes": 3, |
78 | | - "max_nodes": 4, |
79 | | - "structure_only": false, |
80 | | - "algorithm": "TEDS", |
81 | | - "content_type": "table" |
82 | | - } |
| 72 | + "content_type": "table", |
| 73 | + "algorithm": "TEDS" |
| 74 | + }, |
| 75 | + "error": "Skipped due to table_edit failure: unknown reason" |
83 | 76 | }, |
84 | 77 | "overall": { |
85 | | - "score": 0.04014989293361884, |
| 78 | + "score": 0.045126353790613694, |
86 | 79 | "success": true, |
87 | 80 | "details": { |
88 | 81 | "source": "average_of_all_metrics", |
89 | 82 | "description": "Overall score as average of all successful metrics", |
90 | | - "successful_metrics": 4, |
91 | | - "failed_metrics": 1, |
| 83 | + "successful_metrics": 2, |
| 84 | + "failed_metrics": 3, |
92 | 85 | "individual_scores": { |
93 | 86 | "code_edit": 0.0, |
94 | | - "text_edit": 0.16059957173447537, |
95 | | - "table_edit": 0.0, |
96 | | - "table_TEDS": 0.0 |
| 87 | + "text_edit": 0.09025270758122739 |
97 | 88 | } |
98 | 89 | } |
99 | 90 | } |
|
109 | 100 | { |
110 | 101 | "sample_id": "93898d00-0d6c-451d-9f99-4c386c6c2918", |
111 | 102 | "extraction_success": true, |
112 | | - "extraction_time": 0.9913830757141113, |
| 103 | + "extraction_time": 0.0010640621185302734, |
113 | 104 | "metrics": { |
114 | 105 | "code_edit": { |
115 | 106 | "score": 0.0, |
|
132 | 123 | "error": "Both predicted and groundtruth are empty" |
133 | 124 | }, |
134 | 125 | "text_edit": { |
135 | | - "score": 0.8813242747291157, |
| 126 | + "score": 0.00036560579020428197, |
136 | 127 | "success": true, |
137 | 128 | "details": { |
138 | | - "distance": 21730, |
139 | | - "predicted_length": 183104, |
| 129 | + "distance": 161317, |
| 130 | + "predicted_length": 59, |
140 | 131 | "groundtruth_length": 161376, |
141 | 132 | "normalized": true, |
142 | | - "predicted_text_length": 183104, |
| 133 | + "predicted_text_length": 59, |
143 | 134 | "groundtruth_text_length": 161376, |
144 | 135 | "content_type": "text" |
145 | 136 | } |
146 | 137 | }, |
147 | 138 | "table_edit": { |
148 | 139 | "score": 0.0, |
149 | | - "success": true, |
| 140 | + "success": false, |
150 | 141 | "details": { |
151 | | - "distance": 33, |
152 | | - "predicted_length": 33, |
153 | | - "groundtruth_length": 0, |
154 | | - "normalized": true, |
155 | | - "predicted_table_length": 33, |
| 142 | + "predicted_table_length": 0, |
156 | 143 | "groundtruth_table_length": 0, |
157 | 144 | "content_type": "table" |
158 | | - } |
| 145 | + }, |
| 146 | + "error": "Both predicted and groundtruth are empty" |
159 | 147 | }, |
160 | 148 | "table_TEDS": { |
161 | 149 | "score": 0.0, |
162 | | - "success": true, |
| 150 | + "success": false, |
163 | 151 | "details": { |
164 | | - "edit_distance": 8.0, |
165 | | - "predicted_nodes": 8, |
166 | | - "groundtruth_nodes": 3, |
167 | | - "max_nodes": 8, |
168 | | - "structure_only": false, |
169 | | - "algorithm": "TEDS", |
170 | | - "content_type": "table" |
171 | | - } |
| 152 | + "content_type": "table", |
| 153 | + "algorithm": "TEDS" |
| 154 | + }, |
| 155 | + "error": "Skipped due to table_edit failure: unknown reason" |
172 | 156 | }, |
173 | 157 | "overall": { |
174 | | - "score": 0.29377475824303856, |
| 158 | + "score": 0.00036560579020428197, |
175 | 159 | "success": true, |
176 | 160 | "details": { |
177 | 161 | "source": "average_of_all_metrics", |
178 | 162 | "description": "Overall score as average of all successful metrics", |
179 | | - "successful_metrics": 3, |
180 | | - "failed_metrics": 2, |
| 163 | + "successful_metrics": 1, |
| 164 | + "failed_metrics": 4, |
181 | 165 | "individual_scores": { |
182 | | - "text_edit": 0.8813242747291157, |
183 | | - "table_edit": 0.0, |
184 | | - "table_TEDS": 0.0 |
| 166 | + "text_edit": 0.00036560579020428197 |
185 | 167 | } |
186 | 168 | } |
187 | 169 | } |
|
0 commit comments