-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathrender.py
More file actions
79 lines (65 loc) · 1.54 KB
/
render.py
File metadata and controls
79 lines (65 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import re
import yaml
from yaml.loader import SafeLoader
from tabulate import tabulate
DOMAIN_ORDER = [
"Wikipedia",
"News",
"Web",
"Mixed",
"Books",
"Education",
"Medical",
"Clinical",
"Talks",
]
HEADER = [
"Dataset",
"Lang",
"Domain",
"Kind",
"Level",
"Instances",
"Refs.",
"Link",
]
# More compact
LEVEL_MAP = {
'Sentence': 'Sent',
'Paragraph': 'Par',
'Document': 'Doc',
'Lexical': 'Lex',
}
def record_to_row(d: dict):
if d["link"]:
link = f"[Link]({d['link']})"
else:
link = "n/a"
if d["linkNote"]:
link += f" ({d['linkNote']})"
author_year = f"({d['author']}, {d['year']})"
author_year_link = f"[{author_year}]({d['paper']})"
name = f"**{d['name']}** {author_year_link}"
return [
name,
d["language"],
d["domain"],
d["kind"],
LEVEL_MAP.get(d["level"]),
d["instances"],
d["references"],
link,
]
with open("data.yml") as f:
datasets = yaml.load(f, Loader=SafeLoader)
datasets = datasets["datasets"].values()
datasets = sorted(datasets, key=lambda d: (DOMAIN_ORDER.index(d["domain"]), d["year"]))
rows = map(record_to_row, datasets)
table = tabulate(rows, headers=HEADER, tablefmt="github")
table = re.sub(" +", " ", table) # more compact
table = re.sub("-+", "-", table) # more compact
with open("README.template.md") as fin:
text = fin.read()
text = text.replace("{{datasets}}", table)
with open("README.md", "w") as fout:
fout.write(text)