Skip to content

Commit 136df23

Browse files
new: Add new tests for prodigy_to_tsv
1 parent 6875446 commit 136df23

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

tests/prodigy/test_prodigy_entrypoints.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ def test_prodigy_to_tsv(tmpdir):
3535
respect_docs=True,
3636
)
3737

38+
def test_prodigy_to_tsv_multiple_inputs(tmpdir):
39+
prodigy_to_tsv(
40+
TEST_TOKEN_LABELLED + "," + TEST_TOKEN_LABELLED,
41+
os.path.join(tmpdir, "tokens.tsv"),
42+
respect_lines=False,
43+
respect_docs=True,
44+
)
3845

3946
def test_reach_to_prodigy(tmpdir):
4047
reach_to_prodigy(TEST_REACH, os.path.join(tmpdir, "prodigy.jsonl"))

tests/prodigy/test_prodigy_to_tsv.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88

99
from deep_reference_parser.io import read_jsonl
10-
from deep_reference_parser.prodigy.prodigy_to_tsv import TokenLabelPairs, prodigy_to_tsv
10+
from deep_reference_parser.prodigy.prodigy_to_tsv import TokenLabelPairs, prodigy_to_tsv, check_inputs
1111

1212
from .common import TEST_SPANS, TEST_TOKENS
1313

@@ -738,6 +738,41 @@ def test_reference_spans_real_example(doc):
738738
tlp = TokenLabelPairs(respect_line_endings=False)
739739
actual = tlp.run([doc])
740740

741-
import pprint
742-
743741
assert actual == expected
742+
743+
def test_check_input_exist_on_doc_mismatch():
744+
745+
dataset_a = [{"_input_hash": "a1"}, {"_input_hash": "a2"}]
746+
dataset_b = [{"_input_hash": "b1"}, {"_input_hash": "b2"}]
747+
748+
with pytest.raises(SystemExit):
749+
check_inputs([dataset_a, dataset_b])
750+
751+
def test_check_input_exist_on_tokens_mismatch():
752+
753+
dataset_a = [
754+
{"_input_hash": "a", "tokens": [{"text":"a"}]},
755+
{"_input_hash": "a", "tokens": [{"text":"b"}]},
756+
]
757+
758+
dataset_b = [
759+
{"_input_hash": "a", "tokens": [{"text":"b"}]},
760+
{"_input_hash": "a", "tokens": [{"text":"b"}]},
761+
]
762+
763+
with pytest.raises(SystemExit):
764+
check_inputs([dataset_a, dataset_b])
765+
766+
def test_check_input():
767+
768+
dataset_a = [
769+
{"_input_hash": "a", "tokens": [{"text":"a"}]},
770+
{"_input_hash": "a", "tokens": [{"text":"b"}]},
771+
]
772+
773+
dataset_b = [
774+
{"_input_hash": "a", "tokens": [{"text":"a"}]},
775+
{"_input_hash": "a", "tokens": [{"text":"b"}]},
776+
]
777+
778+
assert check_inputs([dataset_a, dataset_b])

0 commit comments

Comments
 (0)