1+ {
2+ "metadata" : {
3+ "dataset_name" : " llm_webkit_test" ,
4+ "extractor_name" : " llm-webkit" ,
5+ "timestamp" : " 2025-07-31T13:52:12.948959" ,
6+ "total_samples" : 3
7+ },
8+ "overall_metrics" : {
9+ "code_edit" : 0.8293333333333334 ,
10+ "formula_edit" : 0.7076023391812866 ,
11+ "table_edit" : 0.9629629629629629 ,
12+ "table_TEDS" : 1.0 ,
13+ "text_edit" : 0.6105951152390782 ,
14+ "overall" : 0.8220987501433322
15+ },
16+ "sample_results" : [
17+ {
18+ "sample_id" : " text_code_sample" ,
19+ "extraction_success" : true ,
20+ "extraction_time" : 3.6406631469726562 ,
21+ "metrics" : {
22+ "code_edit" : {
23+ "score" : 0.488 ,
24+ "success" : true ,
25+ "details" : {
26+ "distance" : 64 ,
27+ "predicted_length" : 125 ,
28+ "groundtruth_length" : 61 ,
29+ "normalized" : true ,
30+ "predicted_code_length" : 125 ,
31+ "groundtruth_code_length" : 61 ,
32+ "content_type" : " code"
33+ }
34+ },
35+ "formula_edit" : {
36+ "score" : 1.0 ,
37+ "success" : true ,
38+ "details" : {
39+ "distance" : 0 ,
40+ "predicted_length" : 0 ,
41+ "groundtruth_length" : 0 ,
42+ "normalized" : true ,
43+ "predicted_formula_length" : 0 ,
44+ "groundtruth_formula_length" : 0 ,
45+ "content_type" : " formula"
46+ }
47+ },
48+ "table_edit" : {
49+ "score" : 1.0 ,
50+ "success" : true ,
51+ "details" : {
52+ "distance" : 0 ,
53+ "predicted_length" : 0 ,
54+ "groundtruth_length" : 0 ,
55+ "normalized" : true ,
56+ "predicted_table_length" : 0 ,
57+ "groundtruth_table_length" : 0 ,
58+ "content_type" : " table"
59+ }
60+ },
61+ "table_TEDS" : {
62+ "score" : 1.0 ,
63+ "success" : true ,
64+ "details" : {
65+ "edit_distance" : 0.0 ,
66+ "predicted_nodes" : 3 ,
67+ "groundtruth_nodes" : 3 ,
68+ "max_nodes" : 3 ,
69+ "structure_only" : false ,
70+ "algorithm" : " TEDS" ,
71+ "content_type" : " table"
72+ }
73+ },
74+ "text_edit" : {
75+ "score" : 0.9298245614035088 ,
76+ "success" : true ,
77+ "details" : {
78+ "distance" : 4 ,
79+ "predicted_length" : 57 ,
80+ "groundtruth_length" : 53 ,
81+ "normalized" : true ,
82+ "predicted_text_length" : 57 ,
83+ "groundtruth_text_length" : 53 ,
84+ "content_type" : " text"
85+ }
86+ },
87+ "overall" : {
88+ "score" : 0.8835649122807018 ,
89+ "success" : true ,
90+ "details" : {
91+ "source" : " average_of_all_metrics" ,
92+ "description" : " Overall score as average of all successful metrics" ,
93+ "successful_metrics" : 5 ,
94+ "failed_metrics" : 0 ,
95+ "individual_scores" : {
96+ "code_edit" : 0.488 ,
97+ "formula_edit" : 1.0 ,
98+ "table_edit" : 1.0 ,
99+ "table_TEDS" : 1.0 ,
100+ "text_edit" : 0.9298245614035088
101+ }
102+ }
103+ }
104+ },
105+ "sample_metadata" : {
106+ "url" : null ,
107+ "domain" : null ,
108+ "language" : null ,
109+ "content_type" : null ,
110+ "difficulty" : null
111+ }
112+ },
113+ {
114+ "sample_id" : " table_sample" ,
115+ "extraction_success" : true ,
116+ "extraction_time" : 1.6590700149536133 ,
117+ "metrics" : {
118+ "code_edit" : {
119+ "score" : 1.0 ,
120+ "success" : true ,
121+ "details" : {
122+ "distance" : 0 ,
123+ "predicted_length" : 0 ,
124+ "groundtruth_length" : 0 ,
125+ "normalized" : true ,
126+ "predicted_code_length" : 0 ,
127+ "groundtruth_code_length" : 0 ,
128+ "content_type" : " code"
129+ }
130+ },
131+ "formula_edit" : {
132+ "score" : 1.0 ,
133+ "success" : true ,
134+ "details" : {
135+ "distance" : 0 ,
136+ "predicted_length" : 0 ,
137+ "groundtruth_length" : 0 ,
138+ "normalized" : true ,
139+ "predicted_formula_length" : 0 ,
140+ "groundtruth_formula_length" : 0 ,
141+ "content_type" : " formula"
142+ }
143+ },
144+ "table_edit" : {
145+ "score" : 0.8888888888888888 ,
146+ "success" : true ,
147+ "details" : {
148+ "distance" : 9 ,
149+ "predicted_length" : 72 ,
150+ "groundtruth_length" : 81 ,
151+ "normalized" : true ,
152+ "predicted_table_length" : 72 ,
153+ "groundtruth_table_length" : 81 ,
154+ "content_type" : " table"
155+ }
156+ },
157+ "table_TEDS" : {
158+ "score" : 1.0 ,
159+ "success" : true ,
160+ "details" : {
161+ "edit_distance" : 0.0 ,
162+ "predicted_nodes" : 13 ,
163+ "groundtruth_nodes" : 13 ,
164+ "max_nodes" : 13 ,
165+ "structure_only" : false ,
166+ "algorithm" : " TEDS" ,
167+ "content_type" : " table"
168+ }
169+ },
170+ "text_edit" : {
171+ "score" : 0.6666666666666667 ,
172+ "success" : true ,
173+ "details" : {
174+ "distance" : 3 ,
175+ "predicted_length" : 9 ,
176+ "groundtruth_length" : 6 ,
177+ "normalized" : true ,
178+ "predicted_text_length" : 9 ,
179+ "groundtruth_text_length" : 6 ,
180+ "content_type" : " text"
181+ }
182+ },
183+ "overall" : {
184+ "score" : 0.9111111111111111 ,
185+ "success" : true ,
186+ "details" : {
187+ "source" : " average_of_all_metrics" ,
188+ "description" : " Overall score as average of all successful metrics" ,
189+ "successful_metrics" : 5 ,
190+ "failed_metrics" : 0 ,
191+ "individual_scores" : {
192+ "code_edit" : 1.0 ,
193+ "formula_edit" : 1.0 ,
194+ "table_edit" : 0.8888888888888888 ,
195+ "table_TEDS" : 1.0 ,
196+ "text_edit" : 0.6666666666666667
197+ }
198+ }
199+ }
200+ },
201+ "sample_metadata" : {
202+ "url" : null ,
203+ "domain" : null ,
204+ "language" : null ,
205+ "content_type" : null ,
206+ "difficulty" : null
207+ }
208+ },
209+ {
210+ "sample_id" : " formula_sample" ,
211+ "extraction_success" : true ,
212+ "extraction_time" : 1.5354089736938477 ,
213+ "metrics" : {
214+ "code_edit" : {
215+ "score" : 1.0 ,
216+ "success" : true ,
217+ "details" : {
218+ "distance" : 0 ,
219+ "predicted_length" : 0 ,
220+ "groundtruth_length" : 0 ,
221+ "normalized" : true ,
222+ "predicted_code_length" : 0 ,
223+ "groundtruth_code_length" : 0 ,
224+ "content_type" : " code"
225+ }
226+ },
227+ "formula_edit" : {
228+ "score" : 0.1228070175438597 ,
229+ "success" : true ,
230+ "details" : {
231+ "distance" : 50 ,
232+ "predicted_length" : 9 ,
233+ "groundtruth_length" : 57 ,
234+ "normalized" : true ,
235+ "predicted_formula_length" : 9 ,
236+ "groundtruth_formula_length" : 57 ,
237+ "content_type" : " formula"
238+ }
239+ },
240+ "table_edit" : {
241+ "score" : 1.0 ,
242+ "success" : true ,
243+ "details" : {
244+ "distance" : 0 ,
245+ "predicted_length" : 0 ,
246+ "groundtruth_length" : 0 ,
247+ "normalized" : true ,
248+ "predicted_table_length" : 0 ,
249+ "groundtruth_table_length" : 0 ,
250+ "content_type" : " table"
251+ }
252+ },
253+ "table_TEDS" : {
254+ "score" : 1.0 ,
255+ "success" : true ,
256+ "details" : {
257+ "edit_distance" : 0.0 ,
258+ "predicted_nodes" : 3 ,
259+ "groundtruth_nodes" : 3 ,
260+ "max_nodes" : 3 ,
261+ "structure_only" : false ,
262+ "algorithm" : " TEDS" ,
263+ "content_type" : " table"
264+ }
265+ },
266+ "text_edit" : {
267+ "score" : 0.23529411764705888 ,
268+ "success" : true ,
269+ "details" : {
270+ "distance" : 65 ,
271+ "predicted_length" : 85 ,
272+ "groundtruth_length" : 37 ,
273+ "normalized" : true ,
274+ "predicted_text_length" : 85 ,
275+ "groundtruth_text_length" : 37 ,
276+ "content_type" : " text"
277+ }
278+ },
279+ "overall" : {
280+ "score" : 0.6716202270381837 ,
281+ "success" : true ,
282+ "details" : {
283+ "source" : " average_of_all_metrics" ,
284+ "description" : " Overall score as average of all successful metrics" ,
285+ "successful_metrics" : 5 ,
286+ "failed_metrics" : 0 ,
287+ "individual_scores" : {
288+ "code_edit" : 1.0 ,
289+ "formula_edit" : 0.1228070175438597 ,
290+ "table_edit" : 1.0 ,
291+ "table_TEDS" : 1.0 ,
292+ "text_edit" : 0.23529411764705888
293+ }
294+ }
295+ }
296+ },
297+ "sample_metadata" : {
298+ "url" : null ,
299+ "domain" : null ,
300+ "language" : null ,
301+ "content_type" : null ,
302+ "difficulty" : null
303+ }
304+ }
305+ ],
306+ "category_metrics" : {
307+ "unknown" : {
308+ "code_edit" : 0.8293333333333334 ,
309+ "formula_edit" : 0.7076023391812866 ,
310+ "table_edit" : 0.9629629629629629 ,
311+ "table_TEDS" : 1.0 ,
312+ "text_edit" : 0.6105951152390782 ,
313+ "overall" : 0.8220987501433322
314+ }
315+ },
316+ "error_analysis" : {
317+ "total_samples" : 3 ,
318+ "failed_count" : 0 ,
319+ "success_rate" : 1.0 ,
320+ "common_errors" : {},
321+ "sample_errors" : []
322+ },
323+ "extractor_config" : {
324+ "model_path" : " /Users/chupei/model/checkpoint-3296"
325+ },
326+ "metric_config" : {}
327+ }
0 commit comments