@@ -87,8 +87,8 @@ def get_tables(tables_dir):
87
87
def normalize_float_value (s ):
88
88
match = metric_value_re .search (s )
89
89
if match :
90
- return whitespace_re .sub ("" , match .group (1 )).replace ("," , "" )
91
- return '-'
90
+ return whitespace_re .sub ("" , match .group (1 )).replace ("," , "" ), match . group ( 0 ). strip ()
91
+ return '-' , None
92
92
93
93
94
94
def test_near (x , precise ):
@@ -102,7 +102,7 @@ def test_near(x, precise):
102
102
103
103
104
104
def fuzzy_match (metric , metric_value , target_value ):
105
- metric_value = normalize_float_value (str (metric_value ))
105
+ metric_value , _ = normalize_float_value (str (metric_value ))
106
106
if metric_value in metric_na :
107
107
return False
108
108
metric_value = Decimal (metric_value )
@@ -164,7 +164,7 @@ def mark_with_comparator(task_name, dataset_name, metric_name, arxiv_id, table,
164
164
cell_tags = empty_celltags_like (table )
165
165
for col in range (cols ):
166
166
for row in range (rows ):
167
- for val in table .iloc [row , col ]:
167
+ for val , val_str in table .iloc [row , col ]:
168
168
for record in values :
169
169
if comparator (record .normalized , val ):
170
170
hits += 1
@@ -177,7 +177,8 @@ def mark_with_comparator(task_name, dataset_name, metric_name, arxiv_id, table,
177
177
if arxiv_id == record .arxiv_id :
178
178
tags += "<this_paper/>"
179
179
tags += f"<comparator>{ comp_name } </comparator>" + \
180
- f"<matched_cell>{ val } </matched_cell></hit>"
180
+ f"<matched_cell>{ val } </matched_cell>" + \
181
+ f"<matched_str>{ val_str } </matched_str></hit>"
181
182
cell_tags .iloc [row , col ] += tags
182
183
return cell_tags , hits
183
184
@@ -238,18 +239,16 @@ def match_many(output_dir, task_name, dataset_name, metric_name, tables, values)
238
239
239
240
240
241
def normalize_metric (value ):
241
- value = normalize_float_value (str (value ))
242
+ value , _ = normalize_float_value (str (value ))
242
243
if value in metric_na :
243
244
return Decimal ("NaN" )
244
245
return Decimal (value )
245
246
246
247
247
248
def normalize_cell (cell ):
248
- if len (letters_re .findall (cell )) > 2 :
249
- return []
250
249
matches = metric_value_re .findall (cell )
251
250
matches = [normalize_float_value (match [0 ]) for match in matches ]
252
- values = [Decimal (value ) for value in matches ]
251
+ values = [( Decimal (value [ 0 ]), value [ 1 ]) for value in matches if value not in metric_na ]
253
252
return values
254
253
255
254
0 commit comments