Skip to content

Commit 8d9c680

Browse files
new: Add tests for split_long_span
1 parent 0b309e6 commit 8d9c680

File tree

1 file changed

+52
-4
lines changed

1 file changed

+52
-4
lines changed

tests/prodigy/test_reference_to_token_annotations.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def test_create_span(splitter):
8383
assert out == after
8484

8585

86-
def test_split_long_span(splitter):
86+
def test_split_long_span_three_token_span(splitter):
8787

8888
tokens = [
8989
{"start": 0, "end": 0, "id": 0},
@@ -97,18 +97,66 @@ def test_split_long_span(splitter):
9797

9898
span = {"start": 2, "end": 4, "token_start": 2, "token_end": 4, "label": "BE"}
9999

100-
after = [
100+
expected = [
101101
{"start": 2, "end": 2, "token_start": 2, "token_end": 2, "label": "b-r"},
102102
{"start": 3, "end": 3, "token_start": 3, "token_end": 3, "label": "i-r"},
103103
{"start": 4, "end": 4, "token_start": 4, "token_end": 4, "label": "e-r"},
104104
]
105105

106-
out = splitter.split_long_span(
106+
actual = splitter.split_long_span(
107107
tokens, span, start_label="b-r", end_label="e-r", inside_label="i-r"
108108
)
109109

110-
assert out == after
110+
assert expected == actual
111+
112+
def test_split_long_span_two_token_span(splitter):
113+
114+
tokens = [
115+
{"start": 0, "end": 0, "id": 0},
116+
{"start": 1, "end": 1, "id": 1},
117+
{"start": 2, "end": 2, "id": 2},
118+
{"start": 3, "end": 3, "id": 3},
119+
{"start": 4, "end": 4, "id": 4},
120+
{"start": 5, "end": 5, "id": 5},
121+
{"start": 6, "end": 6, "id": 6},
122+
]
123+
124+
span = {"start": 2, "end": 3, "token_start": 2, "token_end": 3, "label": "BE"}
125+
126+
expected = [
127+
{"start": 2, "end": 2, "token_start": 2, "token_end": 2, "label": "b-r"},
128+
{"start": 3, "end": 3, "token_start": 3, "token_end": 3, "label": "e-r"},
129+
]
130+
131+
actual = splitter.split_long_span(
132+
tokens, span, start_label="b-r", end_label="e-r", inside_label="i-r"
133+
)
134+
135+
assert expected == actual
136+
137+
def test_split_long_span_one_token_span(splitter):
138+
139+
tokens = [
140+
{"start": 0, "end": 0, "id": 0},
141+
{"start": 1, "end": 1, "id": 1},
142+
{"start": 2, "end": 2, "id": 2},
143+
{"start": 3, "end": 3, "id": 3},
144+
{"start": 4, "end": 4, "id": 4},
145+
{"start": 5, "end": 5, "id": 5},
146+
{"start": 6, "end": 6, "id": 6},
147+
]
148+
149+
span = {"start": 2, "end": 2, "token_start": 2, "token_end": 2, "label": "BE"}
150+
151+
expected = [
152+
{"start": 2, "end": 2, "token_start": 2, "token_end": 2, "label": "b-r"},
153+
]
154+
155+
actual = splitter.split_long_span(
156+
tokens, span, start_label="b-r", end_label="e-r", inside_label="i-r"
157+
)
111158

159+
assert expected == actual
112160

113161
def test_reference_spans_be(splitter):
114162

0 commit comments

Comments
 (0)