Skip to content

Commit 1d9fcff

Browse files
author
Marcin Kardas
committed
Add pipeline caching utils
1 parent f931cf6 commit 1d9fcff

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

sota_extractor2/helpers/cache.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import pandas as pd
2+
import json
3+
4+
5+
# these functions are used to cache various results
6+
# of corresponding pipeline steps, to make it faster to
7+
# rerun the pipeline or run in on batch of papers with various
8+
# steps on different machines. The exchange formats are ad hoc and
9+
# can be changed.
10+
11+
12+
def load_tags(path):
13+
with open(path, 'rt') as f:
14+
tags = json.load(f)
15+
return tags
16+
17+
18+
def save_tags(tags, path):
19+
with open(path, 'wt') as f:
20+
json.dump(tags, f)
21+
22+
23+
def load_structure(path):
24+
with open(path, 'rt') as f:
25+
structure = json.load(f)
26+
return structure
27+
28+
29+
def save_structure(structure, path):
30+
with open(path, 'wt') as f:
31+
json.dump(structure, f)
32+
33+
34+
def load_proposals(path):
35+
proposals = pd.read_csv(path, index_col=0)
36+
return proposals
37+
38+
39+
def save_proposals(proposals, path):
40+
proposals.to_csv(path)

0 commit comments

Comments
 (0)