@@ -83,7 +83,7 @@ def test_create_span(splitter):
8383 assert out == after
8484
8585
86- def test_split_long_span (splitter ):
86+ def test_split_long_span_three_token_span (splitter ):
8787
8888 tokens = [
8989 {"start" : 0 , "end" : 0 , "id" : 0 },
@@ -97,18 +97,66 @@ def test_split_long_span(splitter):
9797
9898 span = {"start" : 2 , "end" : 4 , "token_start" : 2 , "token_end" : 4 , "label" : "BE" }
9999
100- after = [
100+ expected = [
101101 {"start" : 2 , "end" : 2 , "token_start" : 2 , "token_end" : 2 , "label" : "b-r" },
102102 {"start" : 3 , "end" : 3 , "token_start" : 3 , "token_end" : 3 , "label" : "i-r" },
103103 {"start" : 4 , "end" : 4 , "token_start" : 4 , "token_end" : 4 , "label" : "e-r" },
104104 ]
105105
106- out = splitter .split_long_span (
106+ actual = splitter .split_long_span (
107107 tokens , span , start_label = "b-r" , end_label = "e-r" , inside_label = "i-r"
108108 )
109109
110- assert out == after
110+ assert expected == actual
111+
112+ def test_split_long_span_two_token_span (splitter ):
113+
114+ tokens = [
115+ {"start" : 0 , "end" : 0 , "id" : 0 },
116+ {"start" : 1 , "end" : 1 , "id" : 1 },
117+ {"start" : 2 , "end" : 2 , "id" : 2 },
118+ {"start" : 3 , "end" : 3 , "id" : 3 },
119+ {"start" : 4 , "end" : 4 , "id" : 4 },
120+ {"start" : 5 , "end" : 5 , "id" : 5 },
121+ {"start" : 6 , "end" : 6 , "id" : 6 },
122+ ]
123+
124+ span = {"start" : 2 , "end" : 3 , "token_start" : 2 , "token_end" : 3 , "label" : "BE" }
125+
126+ expected = [
127+ {"start" : 2 , "end" : 2 , "token_start" : 2 , "token_end" : 2 , "label" : "b-r" },
128+ {"start" : 3 , "end" : 3 , "token_start" : 3 , "token_end" : 3 , "label" : "e-r" },
129+ ]
130+
131+ actual = splitter .split_long_span (
132+ tokens , span , start_label = "b-r" , end_label = "e-r" , inside_label = "i-r"
133+ )
134+
135+ assert expected == actual
136+
137+ def test_split_long_span_one_token_span (splitter ):
138+
139+ tokens = [
140+ {"start" : 0 , "end" : 0 , "id" : 0 },
141+ {"start" : 1 , "end" : 1 , "id" : 1 },
142+ {"start" : 2 , "end" : 2 , "id" : 2 },
143+ {"start" : 3 , "end" : 3 , "id" : 3 },
144+ {"start" : 4 , "end" : 4 , "id" : 4 },
145+ {"start" : 5 , "end" : 5 , "id" : 5 },
146+ {"start" : 6 , "end" : 6 , "id" : 6 },
147+ ]
148+
149+ span = {"start" : 2 , "end" : 2 , "token_start" : 2 , "token_end" : 2 , "label" : "BE" }
150+
151+ expected = [
152+ {"start" : 2 , "end" : 2 , "token_start" : 2 , "token_end" : 2 , "label" : "b-r" },
153+ ]
154+
155+ actual = splitter .split_long_span (
156+ tokens , span , start_label = "b-r" , end_label = "e-r" , inside_label = "i-r"
157+ )
111158
159+ assert expected == actual
112160
113161def test_reference_spans_be (splitter ):
114162
0 commit comments