Skip to content

Commit 30f5435

Browse files
committed
Convert tables to json for visualization
1 parent 0286cca commit 30f5435

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

tables2json.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python
2+
3+
import fire
4+
from sota_extractor.taskdb import TaskDB
5+
from pathlib import Path
6+
import json
7+
import re
8+
import pandas as pd
9+
10+
from label_tables import get_table, get_metadata
11+
12+
def get_celltags(filename):
13+
filename = Path(filename)
14+
if filename.exists():
15+
celltags = pd.read_csv(filename, header=None, dtype=str).fillna('')
16+
return celltags
17+
else:
18+
return pd.DataFrame()
19+
20+
21+
def get_tables(tables_dir):
22+
tables_dir = Path(tables_dir)
23+
all_metadata = {}
24+
all_tables = {}
25+
all_celltags = {}
26+
for metadata_filename in tables_dir.glob("1509*/metadata.json"):
27+
metadata = get_metadata(metadata_filename)
28+
basedir = metadata_filename.parent
29+
arxiv_id = basedir.name
30+
all_metadata[arxiv_id] = metadata
31+
all_tables[arxiv_id] = {t:get_table(basedir / t) for t in metadata}
32+
all_celltags[arxiv_id] = {t:get_celltags(basedir / t.replace("table", "celltags")) for t in metadata}
33+
return all_metadata, all_tables, all_celltags
34+
35+
def t2j(df):
36+
rows, cols = df.shape
37+
return [[df.iloc[r, c] for c in range(cols)] for r in range(rows)]
38+
39+
40+
def tables2json(tables_dir):
41+
metadata, tables, celltags = get_tables(tables_dir)
42+
all_data = {}
43+
for arxiv_id in metadata:
44+
table = {'metadata': metadata[arxiv_id]}
45+
tabs = []
46+
cts = []
47+
for tab in tables[arxiv_id]:
48+
tabs.append(t2j(tables[arxiv_id][tab]))
49+
for ct in celltags[arxiv_id]:
50+
cts.append(t2j(celltags[arxiv_id][ct]))
51+
table['tables'] = tabs
52+
table['celltags'] = cts
53+
all_data[arxiv_id] = table
54+
print(json.dumps(all_data))
55+
56+
if __name__ == '__main__': fire.Fire(tables2json)

0 commit comments

Comments
 (0)