Skip to content

Commit ff48090

Browse files
committed
Style tables
* match annotations with or without axriv version * display table and text styles in jupyter * update inline stylesheet
1 parent de158a6 commit ff48090

File tree

6 files changed

+28
-11
lines changed

6 files changed

+28
-11
lines changed

sota_extractor2/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
# otherwise use this files
1212
data = Path("/mnt/efs/pwc/data")
13-
goldtags_dump = data / "dumps" / "goldtags-2019.07.31_1454-htlatex-latexml.json.gz"
13+
goldtags_dump = data / "dumps" / "goldtags-2019.08.06_0835.json.gz"
1414

1515

1616
elastic = dict(hosts=['localhost'], timeout=20)

sota_extractor2/data/paper_collection.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ def _load_tables(path, annotations, jobs, migrate):
4444

4545
def _load_annotated_papers(path):
4646
dump = load_gql_dump(path, compressed=path.suffix == ".gz")["allPapers"]
47-
annotations = {a.arxiv_id: a for a in dump}
47+
annotations = {remove_arxiv_version(a.arxiv_id): a for a in dump}
48+
annotations.update({a.arxiv_id: a for a in dump})
4849
return annotations
4950

5051

sota_extractor2/data/table.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,22 @@ def extract_references(s):
3636
return text, refs
3737

3838

39+
style_tags_re = re.compile(r"</?(bold|italic|red|green|blue)>")
40+
def remove_text_styles(s):
41+
return style_tags_re.sub("", s)
42+
43+
44+
reference_id_re = re.compile(r"<ref id='([^']*)'>")
45+
def raw_value_to_html(s):
46+
s = style_tags_re.sub(lambda x: "</span>" if x[0].startswith("</") else f'<span class="text-{x[1]}">', s)
47+
s = s.replace("</ref>", "</a>")
48+
s = reference_id_re.sub(r'<a title="\1">', s)
49+
return s
50+
51+
3952
def str2cell(s):
4053
value, refs = extract_references(s)
54+
value = remove_text_styles(value)
4155
return Cell(value=value, raw_value=s, refs=refs)
4256

4357
def read_str_csv(filename):
@@ -122,7 +136,7 @@ def from_file(cls, path, metadata, annotations=None, migrate=False, match_name=N
122136
return cls(metadata['filename'], df, layout, metadata.get('caption'), metadata.get('figure_id'), table_ann, migrate, match_name, guessed_tags)
123137

124138
def display(self):
125-
display_table(self.df.applymap(lambda x: x.value).values, self.df.applymap(lambda x: x.gold_tags).values)
139+
display_table(self.df.applymap(lambda x: raw_value_to_html(x.raw_value)).values, self.df.applymap(lambda x: x.gold_tags).values, self.df.applymap(lambda x:x.layout).values)
126140

127141
#####
128142
# this code is used to migrate table annotations from

sota_extractor2/helpers/jupyter.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def display_html(s): return display(HTML(s))
1313

1414

1515

16-
def display_table(table, structure=None):
16+
def display_table(table, structure=None, layout=None):
1717
"""
1818
matrix - 2d ndarray with cell values
1919
strucutre - 2d ndarray with structure annotation
@@ -23,14 +23,15 @@ def display_table(table, structure=None):
2323
else:
2424
matrix = table
2525
if structure is None: structure = table.matrix_gold_tags
26+
if layout is None: layout = np.zeros_like(matrix, dtype=str)
2627
html = []
2728
html.append(table_style)
2829
html.append('<div class="tableWrapper">')
2930
html.append("<table>")
30-
for row,struc_row in zip(matrix, structure):
31+
for row,struc_row, layout_row in zip(matrix, structure, layout):
3132
html.append("<tr>")
32-
for cell,struct in zip(row,struc_row):
33-
html.append(f'<td class="{struct}">{cell}</td>')
33+
for cell,struct,layout in zip(row,struc_row,layout_row):
34+
html.append(f'<td class="{struct} {layout}">{cell}</td>')
3435
html.append("</tr>")
3536
html.append("</table>")
3637
html.append('</div>')
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
table_style="""<style>body{margin:0;padding:0;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.tableWrapper{-overflow:auto}.tableWrapper .model-params{background-color:#209cee;color:rgba(0,0,0,.7)}.tableWrapper .table-meta{background-color:#fff3c5;color:rgba(0,0,0,.7)}.tableWrapper .model-best{background-color:#ff3860;color:rgba(0,0,0,.7)}.tableWrapper .model-competing{background-color:#ffdd57;color:rgba(0,0,0,.7)}.tableWrapper .model-paper{background-color:#ff3860;color:#fff}.tableWrapper .dataset-sub{background-color:#23d160;color:#fff}.tableWrapper .dataset-metric{background-color:#209cee;color:#fff}.tableWrapper .dataset{background-color:#02bd43;color:#fff}.tableWrapper .trash{background-color:#363636;color:#f5f5f5}.tableWrapper .wtf{background-color:#f0f;color:#f5f5f5}.tableWrapper .dataset-task{background-color:#77ecdd;color:rgba(0,0,0,.7)}.tableWrapper .dataset-paper{background-color:#e4ffee;color:rgba(0,0,0,.7)}.tableWrapper td.focused-cell{outline:2px solid #9ecaed;border-radius:7px;box-shadow:0 0 10px #9ecaed}div.form-group>input.form-control.input-sm{border-radius:2px;font-size:.75rem;background-color:#fff;color:#363636;box-shadow:inset 0 1px 2px rgba(10,10,10,.1);max-width:100%;width:100%;height:2.25em;padding:calc(.375em - 1px) calc(.625em - 1px);position:relative;border:1px solid #b5b5b5}div.form-group>input.form-control.input-sm:focus{border-color:#3273dc;box-shadow:0 0 0 .125em rgba(50,115,220,.25)}
1+
table_style="""<style>
2+
body{margin:0;padding:0;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.tableWrapper{-overflow:auto}.tableWrapper .has-annotations{color:#ff3860}.tableWrapper .model-params{background-color:#209cee;color:rgba(0,0,0,.7)}.tableWrapper .table-meta{background-color:#fff3c5;color:rgba(0,0,0,.7)}.tableWrapper .model-best{background-color:#ff3860;color:rgba(0,0,0,.7)}.tableWrapper .model-competing{background-color:#ffdd57;color:rgba(0,0,0,.7)}.tableWrapper .model-paper{background-color:#ff3860;color:#fff}.tableWrapper .dataset-sub{background-color:#23d160;color:#fff}.tableWrapper .dataset-metric{background-color:#209cee;color:#fff}.tableWrapper .dataset{background-color:#02bd43;color:#fff}.tableWrapper .trash{background-color:#363636;color:#f5f5f5}.tableWrapper .wtf{background-color:#f0f;color:#f5f5f5}.tableWrapper .dataset-task{background-color:#77ecdd;color:rgba(0,0,0,.7)}.tableWrapper .dataset-paper{background-color:#e4ffee;color:rgba(0,0,0,.7)}.tableWrapper td.focused-cell{outline:2px solid #9ecaed;border-radius:7px;box-shadow:0 0 10px #9ecaed}.tableWrapper span.text-bold{font-weight:700}.tableWrapper span.text-italic{font-style:italic}.tableWrapper span.text-red{color:red}.tableWrapper span.text-green{color:green}.tableWrapper span.text-blue{color:#00f}.predict-dataset,.predict-dataset-metric,.predict-model-competing,.predict-model-paper,.predict-model-params,.predict-table-meta{outline:2px solid #9ecaed;border-radius:7px;box-shadow:0 0 10px #9ecaed}.tableWrapper .predict-model-params{background-color:#209cee;color:rgba(0,0,0,.7)}.tableWrapper .predict-table-meta{background-color:#fff3c5;color:rgba(0,0,0,.7)}.tableWrapper .predict-model-competing{background-color:#ffdd57;color:rgba(0,0,0,.7)}.tableWrapper .predict-model-paper{background-color:#ff3860;color:#fff}.tableWrapper .predict-dataset-metric{background-color:#209cee;color:#fff}.tableWrapper .predict-dataset{background-color:#02bd43;color:#fff}.tableWrapper td{border:inherit}.tableWrapper table tr td.border-l{border-left:1px solid #000}.tableWrapper table tr td.border-r{border-right:1px solid #000}.tableWrapper table tr td.border-t{border-top:1px solid #000}.tableWrapper table tr td.border-b{border-bottom:1px solid #000}.tableWrapper table tr td.border-ll{border-left:2px solid #000}.tableWrapper table tr td.border-rr{border-right:2px solid #000}.tableWrapper table tr td.border-tt{border-top:2px solid #000}.tableWrapper table tr td.border-bb{border-bottom:2px solid #000}.tableWrapper table tr td.align-left{text-align:left}.tableWrapper table tr td.align-right{text-align:right}.tableWrapper table tr td.align-center{text-align:center}.tableWrapper table tr td.align-justify{text-align:justify}div.form-group>input.form-control.input-sm{border-radius:2px;font-size:.75rem;background-color:#fff;color:#363636;box-shadow:inset 0 1px 2px rgba(10,10,10,.1);max-width:100%;width:100%;height:2.25em;padding:calc(.375em - 1px) calc(.625em - 1px);position:relative;border:1px solid #b5b5b5}div.form-group>input.form-control.input-sm:focus{border-color:#3273dc;box-shadow:0 0 0 .125em rgba(50,115,220,.25)}.ht_clone_top{z-index:20}.evaluation-tables{overflow:scroll;max-height:20vh;border-top:1px solid #a9a9a9}.navbar.is-fixed-bottom,.navbar.is-fixed-top{z-index:200}body{padding-bottom:20vh}
23
</style>
34
"""

sota_extractor2/models/structure/experiment.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class Labels(Enum):
1717
PAPER_MODEL=2
1818
COMPETING_MODEL=3
1919
METRIC=4
20-
PARAMS=5
20+
# PARAMS=5
2121

2222
label_map = {
2323
"dataset": Labels.DATASET.value,
@@ -26,7 +26,7 @@ class Labels(Enum):
2626
"model-best": Labels.PAPER_MODEL.value,
2727
"model-competing": Labels.COMPETING_MODEL.value,
2828
"dataset-metric": Labels.METRIC.value,
29-
"model-params": Labels.PARAMS.value
29+
# "model-params": Labels.PARAMS.value
3030
}
3131

3232
# put here to avoid recompiling, used only in _limit_context
@@ -252,7 +252,7 @@ def _plot_confusion_matrix(self, cm, normalize, fmt=None):
252252
cm = cm / cm.sum(axis=1)[:, None]
253253
if fmt is None:
254254
fmt = "0.2f" if normalize else "d"
255-
target_names = ["OTHER", "DATASET", "MODEL (paper)", "MODEL (comp.)", "METRIC", "PARAMS"]
255+
target_names = ["OTHER", "DATASET", "MODEL (paper)", "MODEL (comp.)", "METRIC"] #, "PARAMS"]
256256
df_cm = pd.DataFrame(cm, index=[i for i in target_names],
257257
columns=[i for i in target_names])
258258
plt.figure(figsize=(10, 10))

0 commit comments

Comments
 (0)