Skip to content

Commit 292a037

Browse files
author
Marcin Kardas
committed
Load tables from files
1 parent d164644 commit 292a037

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

extract_tables.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,16 @@ def save_tables(data, outdir):
276276
json.dump(metadata, f)
277277

278278

279+
def load_tables(path):
280+
path = Path(path)
281+
with open(path / "metadata.json", "r") as f:
282+
metadata = json.load(f)
283+
284+
return [Table.from_file(
285+
path,
286+
table_metadata) for table_metadata in metadata]
287+
288+
279289
def set_ids_by_labels(soup):
280290
captions = soup.select(".ltx_caption")
281291
for caption in captions:

sota_extractor2/models/linking/linker.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,10 @@ def __call__(self, paper, tables, topk=1):
1717
dataset_extractor=self.dataset_extractor,
1818
topk=topk)
1919

20-
if topk == 1:
21-
proposals = proposals.set_index('cell_ext_id')
22-
best = proposals
23-
else:
24-
best = self.get_best_proposals(proposals)
20+
proposals = proposals.set_index('cell_ext_id')
2521

26-
pipeline_logger(f"{Linker.step}::linked", paper=paper, tables=tables, proposals=best)
22+
pipeline_logger(f"{Linker.step}::linked", paper=paper, tables=tables, proposals=proposals)
2723
return proposals
2824

2925
def get_best_proposals(self, proposals):
30-
return proposals.groupby('cell_ext_id').head(1).set_index('cell_ext_id')
26+
return proposals.groupby('cell_ext_id').head(1)

0 commit comments

Comments
 (0)