|
7 | 7 | import pytest |
8 | 8 |
|
9 | 9 | from deep_reference_parser.io import read_jsonl |
10 | | -from deep_reference_parser.prodigy.prodigy_to_tsv import TokenLabelPairs, prodigy_to_tsv |
| 10 | +from deep_reference_parser.prodigy.prodigy_to_tsv import TokenLabelPairs, prodigy_to_tsv, check_inputs |
11 | 11 |
|
12 | 12 | from .common import TEST_SPANS, TEST_TOKENS |
13 | 13 |
|
@@ -738,6 +738,41 @@ def test_reference_spans_real_example(doc): |
738 | 738 | tlp = TokenLabelPairs(respect_line_endings=False) |
739 | 739 | actual = tlp.run([doc]) |
740 | 740 |
|
741 | | - import pprint |
742 | | - |
743 | 741 | assert actual == expected |
| 742 | + |
| 743 | +def test_check_input_exist_on_doc_mismatch(): |
| 744 | + |
| 745 | + dataset_a = [{"_input_hash": "a1"}, {"_input_hash": "a2"}] |
| 746 | + dataset_b = [{"_input_hash": "b1"}, {"_input_hash": "b2"}] |
| 747 | + |
| 748 | + with pytest.raises(SystemExit): |
| 749 | + check_inputs([dataset_a, dataset_b]) |
| 750 | + |
| 751 | +def test_check_input_exist_on_tokens_mismatch(): |
| 752 | + |
| 753 | + dataset_a = [ |
| 754 | + {"_input_hash": "a", "tokens": [{"text":"a"}]}, |
| 755 | + {"_input_hash": "a", "tokens": [{"text":"b"}]}, |
| 756 | + ] |
| 757 | + |
| 758 | + dataset_b = [ |
| 759 | + {"_input_hash": "a", "tokens": [{"text":"b"}]}, |
| 760 | + {"_input_hash": "a", "tokens": [{"text":"b"}]}, |
| 761 | + ] |
| 762 | + |
| 763 | + with pytest.raises(SystemExit): |
| 764 | + check_inputs([dataset_a, dataset_b]) |
| 765 | + |
| 766 | +def test_check_input(): |
| 767 | + |
| 768 | + dataset_a = [ |
| 769 | + {"_input_hash": "a", "tokens": [{"text":"a"}]}, |
| 770 | + {"_input_hash": "a", "tokens": [{"text":"b"}]}, |
| 771 | + ] |
| 772 | + |
| 773 | + dataset_b = [ |
| 774 | + {"_input_hash": "a", "tokens": [{"text":"a"}]}, |
| 775 | + {"_input_hash": "a", "tokens": [{"text":"b"}]}, |
| 776 | + ] |
| 777 | + |
| 778 | + assert check_inputs([dataset_a, dataset_b]) |
0 commit comments