@@ -174,7 +174,7 @@ def handle_pm(value):
174
174
'confidence' , 'parsed' , 'struct_model_type' , 'struct_dataset' ]
175
175
176
176
177
- def generate_proposals_for_table (table_ext_id , matrix , structure , desc , taxonomy_linking , datasets ):
177
+ def generate_proposals_for_table (table_ext_id , matrix , structure , desc , taxonomy_linking , datasets , topk = 1 ):
178
178
# %%
179
179
# Proposal generation
180
180
def consume_cells (matrix ):
@@ -217,11 +217,6 @@ def annotations(r, c, type='model'):
217
217
218
218
def linked_proposals (proposals ):
219
219
for prop in proposals :
220
- df = taxonomy_linking (prop .dataset , datasets , desc , debug_info = prop )
221
- assert len (df ) == 1
222
-
223
- metric = df ['metric' ][0 ]
224
-
225
220
# heuristyic to handle accuracy vs error
226
221
first_num = (list (handle_pm (prop .raw_value )) + [0 ])[0 ]
227
222
format = "{x}"
@@ -234,24 +229,27 @@ def linked_proposals(proposals):
234
229
if '%' in prop .raw_value :
235
230
format += '%'
236
231
237
- # if ("error" in metric or "Error" in metric) and (first_num > 0.5):
238
- if (metric .strip ().lower () == "error" ) and (first_num > 0.5 ):
239
- metric = "Accuracy"
240
-
241
- linked = {
242
- 'dataset' : df ['dataset' ][0 ],
243
- 'metric' : metric ,
244
- 'task' : df ['task' ][0 ],
245
- 'format' : format ,
246
- 'raw_value' : prop .raw_value ,
247
- 'model' : prop .model_name ,
248
- 'model_type' : prop .model_type ,
249
- 'cell_ext_id' : prop .cell .cell_ext_id ,
250
- 'confidence' : df ['confidence' ][0 ],
251
- 'struct_model_type' : prop .model_type ,
252
- 'struct_dataset' : prop .dataset
253
- }
254
- yield linked
232
+ df = taxonomy_linking (prop .dataset , datasets , desc , topk = topk , debug_info = prop )
233
+ for _ , row in df .iterrows ():
234
+ metric = row ['metric' ]
235
+ # if ("error" in metric or "Error" in metric) and (first_num > 0.5):
236
+ if (metric .strip ().lower () == "error" ) and (first_num > 0.5 ):
237
+ metric = "Accuracy"
238
+
239
+ linked = {
240
+ 'dataset' : row ['dataset' ],
241
+ 'metric' : metric ,
242
+ 'task' : row ['task' ],
243
+ 'format' : format ,
244
+ 'raw_value' : prop .raw_value ,
245
+ 'model' : prop .model_name ,
246
+ 'model_type' : prop .model_type ,
247
+ 'cell_ext_id' : prop .cell .cell_ext_id ,
248
+ 'confidence' : row ['confidence' ],
249
+ 'struct_model_type' : prop .model_type ,
250
+ 'struct_dataset' : prop .dataset
251
+ }
252
+ yield linked
255
253
256
254
# specify columns in case there's no proposal
257
255
@@ -264,7 +262,7 @@ def linked_proposals(proposals):
264
262
265
263
266
264
def linked_proposals (paper_ext_id , paper , annotated_tables , taxonomy_linking = MatchSearch (),
267
- dataset_extractor = None ):
265
+ dataset_extractor = None , topk = 1 ):
268
266
# dataset_extractor=DatasetExtractor()):
269
267
proposals = []
270
268
datasets = dataset_extractor .from_paper (paper )
@@ -277,7 +275,11 @@ def linked_proposals(paper_ext_id, paper, annotated_tables, taxonomy_linking=Mat
277
275
table_ext_id = f"{ paper_ext_id } /{ table .name } "
278
276
279
277
if 'sota' in tags and 'no_sota_records' not in tags : # only parse tables that are marked as sota
280
- proposals .append (generate_proposals_for_table (table_ext_id , matrix , structure , desc , taxonomy_linking , datasets ))
278
+ proposals .append (
279
+ generate_proposals_for_table (
280
+ table_ext_id , matrix , structure , desc , taxonomy_linking , datasets , topk = topk
281
+ )
282
+ )
281
283
if len (proposals ):
282
284
return pd .concat (proposals )
283
285
return pd .DataFrame (columns = proposal_columns )
0 commit comments