Skip to content

Commit a0aea55

Browse files
committed
Include tasks in flattened json
1 parent c960077 commit a0aea55

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

flatten_evaltab.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
#!/usr/bin/env bash
2-
jq -c '.. | select(.datasets?).datasets | .[] | .dataset as $dataset | .sota.rows[] | {paper_url, paper_title, model_name} as $paper | .metrics | . as $metrics | keys[] | {dataset: $dataset, metric_name: ., metric_value: $metrics[.], paper_url: $paper.paper_url, paper_title: $paper.paper_title, model_name: $paper.model_name }' "$1" | grep arxiv\.org | jq -s '.'
2+
jq -c '.. | select(.datasets?) | .task as $task | .datasets | .[] | .dataset as $dataset | .sota.rows[] | {paper_url, paper_title, model_name} as $paper | .metrics | . as $metrics | keys[] | {dataset: $dataset, metric_name: ., metric_value: $metrics[.], paper_url: $paper.paper_url, paper_title: $paper.paper_title, model_name: $paper.model_name, task: $task }' "$1" | grep arxiv\.org | jq -s '.'

label_tables.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,18 @@ def match_metric(metric, tables, value):
152152
]
153153

154154

155+
def empty_celltags_like(table):
156+
return = pd.DataFrame().reindex_like(table).fillna('')
157+
158+
155159
def mark_with_best_comparator(task_name, dataset_name, metric_name, arxiv_id, table, values):
156160
max_hits = 0
157161
best_tags = None
158162
rows, cols = table.shape
159163

160164
for comparator in comparators:
161165
hits = 0
162-
cell_tags = pd.DataFrame().reindex_like(table).fillna('')
166+
cell_tags = empty_celltags_like(table)
163167
for col in range(cols):
164168
for row in range(rows):
165169
for val in table.iloc[row, col]:
@@ -180,15 +184,36 @@ def mark_with_best_comparator(task_name, dataset_name, metric_name, arxiv_id, ta
180184
best_tags = cell_tags
181185

182186
return best_tags
187+
188+
189+
def normalize_string(s):
190+
return s.lower.strip()
191+
192+
193+
def match_str(a, b):
194+
return normalize_string(a) == normalize_string(b)
195+
196+
197+
def mark_strings(table, tags, values):
198+
cell_tags = empty_celltags_like(table)
199+
beg, end = tags
200+
rows, cols = table.shape
201+
for col in range(cols):
202+
for row in range(rows):
203+
for s in values:
204+
real = table.iloc[row, col]
205+
if match_str(real, s):
206+
cell_tags += f"{beg}{s}{end}"
207+
return cell_tags
183208

184209

185210
metatables = {}
186211
def match_many(output_dir, task_name, dataset_name, metric_name, tables, values):
187212
for arxiv_id in tables:
188213
for table in tables[arxiv_id]:
189214
best = mark_with_best_comparator(task_name, dataset_name, metric_name, arxiv_id, tables[arxiv_id][table], values)
215+
global metatables
190216
if best is not None:
191-
global metatables
192217
key = (arxiv_id, table)
193218
if key in metatables:
194219
metatables[key] += best

0 commit comments

Comments
 (0)