Skip to content

Commit 5cb2a40

Browse files
author
Marcin Kardas
committed
Fix structure annotation matching
1 parent d32e252 commit 5cb2a40

File tree

2 files changed

+12
-6
lines changed

2 files changed

+12
-6
lines changed

sota_extractor2/data/paper_collection.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(self, path, load_texts=True, load_tables=True):
3939
outer_join = set(texts).union(set(tables))
4040

4141
self._papers = {k: Paper(texts.get(k), tables.get(k), annotations.get(k)) for k in outer_join}
42+
self.annotations = annotations
4243

4344
def __len__(self):
4445
return len(self._papers)

sota_extractor2/data/table.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ def __init__(self, df, caption=None, figure_id=None, annotations=None):
2020
self.df = df.applymap(lambda x: Cell(value=x))
2121
if annotations is not None:
2222
self.gold_tags = annotations.gold_tags.strip()
23-
rows, cols = annotations.matrix_gold_tags.shape
24-
for r in range(rows):
25-
for c in range(cols):
26-
self.df.iloc[r,c].gold_tags = annotations.matrix_gold_tags.iloc[r,c].strip()
23+
tags = annotations.matrix_gold_tags
24+
if self.df.shape != (0,0):
25+
for r, row in enumerate(tags):
26+
for c, cell in enumerate(row):
27+
self.df.iloc[r,c].gold_tags = cell.strip()
2728
else:
2829
self.gold_tags = ''
2930

@@ -33,10 +34,14 @@ def from_file(cls, path, metadata, annotations=None):
3334
df = pd.read_csv(path, header=None, dtype=str).fillna('')
3435
except pd.errors.EmptyDataError:
3536
df = pd.DataFrame()
36-
return cls(df, metadata.get('caption'), metadata.get('figure_id'), annotations)
37+
if annotations is not None:
38+
table_ann = annotations.table_set.filter(name=metadata['filename']) + [None]
39+
table_ann = table_ann[0]
40+
else:
41+
table_ann = None
42+
return cls(df, metadata.get('caption'), metadata.get('figure_id'), table_ann)
3743

3844
def display(self):
39-
4045
display_table(self.df.applymap(lambda x: x.value).values, self.df.applymap(lambda x: x.gold_tags).values)
4146

4247
def read_tables(path, annotations):

0 commit comments

Comments
 (0)