Skip to content

Commit c960077

Browse files
committed
Add support for numbers with std
1 parent 1093a50 commit c960077

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

flatten_evaltab.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/usr/bin/env bash
2+
jq -c '.. | select(.datasets?).datasets | .[] | .dataset as $dataset | .sota.rows[] | {paper_url, paper_title, model_name} as $paper | .metrics | . as $metrics | keys[] | {dataset: $dataset, metric_name: ., metric_value: $metrics[.], paper_url: $paper.paper_url, paper_title: $paper.paper_title, model_name: $paper.model_name }' "$1" | grep arxiv\.org | jq -s '.'

label_tables.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,18 @@ def get_tables(tables_dir):
7676

7777

7878

79-
float_value_re = re.compile(r"([+-]?\s*(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)")
79+
float_value_re = re.compile(r"([+-]?\s*((\d{1,2}(,\d{3})+|\d+)(\.\d*)?|\.\d+)([eE][+-]?\d+)?)")
80+
letters_re = re.compile("[^\W\d_]", re.UNICODE)
81+
82+
# float value possibly with std
83+
metric_value_re = re.compile(float_value_re.pattern + r"(\s*±\s*" + float_value_re.pattern + ")?")
8084
whitespace_re = re.compile(r"\s+")
8185

8286

8387
def normalize_float_value(s):
84-
match = float_value_re.search(s)
88+
match = metric_value_re.search(s)
8589
if match:
86-
return whitespace_re.sub("", match.group(0))
90+
return whitespace_re.sub("", match.group(1)).replace(",", "")
8791
return '-'
8892

8993

@@ -103,7 +107,7 @@ def fuzzy_match(metric, metric_value, target_value):
103107
return False
104108
metric_value = Decimal(metric_value)
105109

106-
for match in float_value_re.findall(target_value):
110+
for match in metric_value_re.findall(target_value):
107111
value = whitespace_re.sub("", match[0])
108112
value = Decimal(value)
109113

@@ -200,8 +204,10 @@ def normalize_metric(value):
200204

201205

202206
def normalize_cell(cell):
203-
matches = float_value_re.findall(cell)
204-
matches = [whitespace_re.sub("", match[0]) for match in matches]
207+
if len(letters_re.findall(cell)) > 2:
208+
return []
209+
matches = metric_value_re.findall(cell)
210+
matches = [normalize_float_value(match[0]) for match in matches]
205211
values = [Decimal(value) for value in matches]
206212
return values
207213

0 commit comments

Comments
 (0)