Skip to content

Commit 87897f1

Browse files
committed
Remove parallel loading
1 parent 917f1b3 commit 87897f1

File tree

1 file changed

+2
-5
lines changed

1 file changed

+2
-5
lines changed

sota_extractor2/data/paper_collection.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
from .json import load_gql_dump
44
from pathlib import Path
55
import re
6-
from tqdm import tqdm
7-
from joblib import Parallel, delayed
86

97
class Paper:
108
def __init__(self, text, tables, annotations):
@@ -53,14 +51,13 @@ def __iter__(self):
5351

5452
def _load_texts(self):
5553
files = list((self.path / "texts").glob("**/*.json"))
56-
texts = Parallel(n_jobs=-1, prefer="processes")(delayed(PaperText.from_file)(f) for f in files)
54+
texts = [PaperText.from_file(f) for f in files]
5755
return {clear_arxiv_version(text.meta.id): text for text in texts}
5856

5957

6058
def _load_tables(self, annotations):
6159
files = list((self.path / "tables").glob("**/metadata.json"))
62-
tables = Parallel(n_jobs=-1, prefer="processes")(delayed(read_tables)(f.parent, annotations) for f in files)
63-
return {clear_arxiv_version(f.parent.name): tbls for f, tbls in zip(files, tables)}
60+
return {clear_arxiv_version(f.parent.name): read_tables(f.parent, annotations) for f in files}
6461

6562
def _load_annotated_papers(self):
6663
dump = load_gql_dump(self.path / "structure-annotations.json.gz", compressed=True)["allPapers"]

0 commit comments

Comments
 (0)