Skip to content

Commit 407eb08

Browse files
author
Marcin Kardas
committed
Migrate extraction tests into py.test
1 parent 281c907 commit 407eb08

File tree

10 files changed

+41
-103
lines changed

10 files changed

+41
-103
lines changed

Makefile

Lines changed: 0 additions & 83 deletions
This file was deleted.

pytest.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[pytest]
2+
filterwarnings =
3+
ignore::DeprecationWarning

test/expected/paper/layout_01.csv

Lines changed: 0 additions & 5 deletions
This file was deleted.

test/expected/paper/layout_02.csv

Lines changed: 0 additions & 4 deletions
This file was deleted.

test/expected/paper/metadata.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

test/expected/paper/table_01.csv

Lines changed: 0 additions & 5 deletions
This file was deleted.

test/expected/paper/table_02.csv

Lines changed: 0 additions & 4 deletions
This file was deleted.

test/expected/paper/text.json

Lines changed: 0 additions & 1 deletion
This file was deleted.
File renamed without changes.

tests/test_extraction.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2+
3+
import pytest
4+
from pathlib import Path
5+
from axcell.helpers.paper_extractor import PaperExtractor
6+
from axcell.data.paper_collection import PaperCollection
7+
from shutil import copyfileobj
8+
import gzip
9+
10+
11+
def test_extraction(tmpdir):
12+
# pack main.tex to an archive
13+
tmpdir = Path(tmpdir)
14+
source = Path(__file__).resolve().parent / "data" / "main.tex"
15+
paper_id = "1234.56789"
16+
archive = tmpdir / "sources" / paper_id
17+
archive.parent.mkdir()
18+
with source.open("rb") as src, gzip.open(archive, "wb") as dst:
19+
copyfileobj(src, dst)
20+
21+
extract = PaperExtractor(tmpdir)
22+
status = extract(archive)
23+
assert status == "success"
24+
25+
pc = PaperCollection.from_files(tmpdir / "papers")
26+
extracted = len(pc)
27+
assert extracted == 1, f"Expected to extract exactly one paper, found {extracted}"
28+
29+
paper = pc[0]
30+
assert paper.paper_id == paper_id
31+
assert paper.text.title == "DILBERT: Distilling Inner Latent BERT variables"
32+
assert len(paper.tables) == 2
33+
34+
assert paper.tables[0].caption == "Table 1: A table."
35+
assert paper.tables[1].caption == "Table 2: A table."
36+
37+
assert paper.tables[0].shape == (5, 3)
38+
assert paper.tables[1].shape == (4, 3)

0 commit comments

Comments
 (0)