Skip to content

Commit f69dabc

Browse files
authored
Merge pull request #15 from thomashacker/bugifx/fix-span-suggester-tests
Fix tests for span suggesters
2 parents 9b13f45 + 18e1c1e commit f69dabc

File tree

2 files changed

+207
-15
lines changed

2 files changed

+207
-15
lines changed

azure-pipelines.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ jobs:
5050

5151
- script: |
5252
pip install -r requirements.txt
53-
python -m spacy download en_core_web_sm
5453
pip install "torch==1.9.0+cpu" -f https://download.pytorch.org/whl/torch_stable.html
5554
python -m pytest --pyargs spacy_experimental
5655
displayName: 'Run tests'

spacy_experimental/span_suggesters/tests/test_suggesters.py

Lines changed: 207 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,252 @@
11
from spacy.util import registry
22
import spacy
3+
from spacy.tokens import Doc
34

45

56
def test_ngram_subtree_suggester():
6-
nlp = spacy.load("en_core_web_sm")
7-
doc = nlp("I decided to go for a little run.")
7+
8+
nlp = spacy.blank("en")
9+
text = "I decided to go for a little run."
10+
heads = [1, 1, 3, 1, 3, 7, 7, 4, 1]
11+
deps = ["nsubj", "ROOT", "aux", "xcomp", "prep", "det", "amod", "pobj", "punct"]
12+
13+
tokenized = nlp(text)
14+
spaces = [bool(t.whitespace_) for t in tokenized]
15+
doc = Doc(
16+
tokenized.vocab,
17+
words=[t.text for t in tokenized],
18+
spaces=spaces,
19+
heads=heads,
20+
deps=deps,
21+
)
22+
823
suggester = registry.misc.get("spacy-experimental.ngram_subtree_suggester.v1")([1])
924
candidates = suggester([doc])
1025

1126
assert len(candidates.data) == 17
1227

1328

1429
def test_subtree_suggester():
15-
nlp = spacy.load("en_core_web_sm")
16-
doc = nlp("I decided to go for a little run.")
30+
31+
nlp = spacy.blank("en")
32+
text = "I decided to go for a little run."
33+
heads = [1, 1, 3, 1, 3, 7, 7, 4, 1]
34+
deps = ["nsubj", "ROOT", "aux", "xcomp", "prep", "det", "amod", "pobj", "punct"]
35+
36+
tokenized = nlp(text)
37+
spaces = [bool(t.whitespace_) for t in tokenized]
38+
doc = Doc(
39+
tokenized.vocab,
40+
words=[t.text for t in tokenized],
41+
spaces=spaces,
42+
heads=heads,
43+
deps=deps,
44+
)
45+
1746
suggester = registry.misc.get("spacy-experimental.subtree_suggester.v1")()
1847
candidates = suggester([doc])
1948

2049
assert len(candidates.data) == 15
2150

2251

2352
def test_ngram_chunk_suggester():
24-
nlp = spacy.load("en_core_web_sm")
25-
doc = nlp(
26-
"The best thing about visiting the President is the food! I must've drank me fifteen Dr.Peppers."
53+
54+
nlp = spacy.blank("en")
55+
text = "The best thing about visiting the President is the food! I must've drank me fifteen Dr.Peppers."
56+
heads = [2, 2, 7, 2, 3, 6, 4, 7, 9, 7, 7, 14, 14, 14, 14, 14, 18, 18, 14, 14]
57+
deps = [
58+
"det",
59+
"amod",
60+
"nsubj",
61+
"prep",
62+
"pcomp",
63+
"det",
64+
"dobj",
65+
"ROOT",
66+
"det",
67+
"attr",
68+
"punct",
69+
"nsubj",
70+
"aux",
71+
"aux",
72+
"ROOT",
73+
"dative",
74+
"nummod",
75+
"compound",
76+
"dobj",
77+
"punct",
78+
]
79+
pos = [
80+
"DET",
81+
"ADJ",
82+
"NOUN",
83+
"ADP",
84+
"VERB",
85+
"DET",
86+
"PROPN",
87+
"AUX",
88+
"DET",
89+
"NOUN",
90+
"PUNCT",
91+
"PRON",
92+
"AUX",
93+
"AUX",
94+
"VERB",
95+
"PRON",
96+
"NUM",
97+
"PROPN",
98+
"PROPN",
99+
"PUNCT",
100+
]
101+
102+
tokenized = nlp(text)
103+
spaces = [bool(t.whitespace_) for t in tokenized]
104+
doc = Doc(
105+
tokenized.vocab,
106+
words=[t.text for t in tokenized],
107+
spaces=spaces,
108+
heads=heads,
109+
deps=deps,
110+
pos=pos,
27111
)
112+
28113
suggester = registry.misc.get("spacy-experimental.ngram_chunk_suggester.v1")([1])
29114
candidates = suggester([doc])
30115

31116
assert len(candidates.data) == 24
32117

33118

34119
def test_chunk_suggester():
35-
nlp = spacy.load("en_core_web_sm")
36-
doc = nlp(
37-
"The best thing about visiting the President is the food! I must've drank me fifteen Dr.Peppers."
120+
121+
nlp = spacy.blank("en")
122+
text = "The best thing about visiting the President is the food! I must've drank me fifteen Dr.Peppers."
123+
heads = [2, 2, 7, 2, 3, 6, 4, 7, 9, 7, 7, 14, 14, 14, 14, 14, 18, 18, 14, 14]
124+
deps = [
125+
"det",
126+
"amod",
127+
"nsubj",
128+
"prep",
129+
"pcomp",
130+
"det",
131+
"dobj",
132+
"ROOT",
133+
"det",
134+
"attr",
135+
"punct",
136+
"nsubj",
137+
"aux",
138+
"aux",
139+
"ROOT",
140+
"dative",
141+
"nummod",
142+
"compound",
143+
"dobj",
144+
"punct",
145+
]
146+
pos = [
147+
"DET",
148+
"ADJ",
149+
"NOUN",
150+
"ADP",
151+
"VERB",
152+
"DET",
153+
"PROPN",
154+
"AUX",
155+
"DET",
156+
"NOUN",
157+
"PUNCT",
158+
"PRON",
159+
"AUX",
160+
"AUX",
161+
"VERB",
162+
"PRON",
163+
"NUM",
164+
"PROPN",
165+
"PROPN",
166+
"PUNCT",
167+
]
168+
169+
tokenized = nlp(text)
170+
spaces = [bool(t.whitespace_) for t in tokenized]
171+
doc = Doc(
172+
tokenized.vocab,
173+
words=[t.text for t in tokenized],
174+
spaces=spaces,
175+
heads=heads,
176+
deps=deps,
177+
pos=pos,
38178
)
179+
39180
suggester = registry.misc.get("spacy-experimental.chunk_suggester.v1")()
40181
candidates = suggester([doc])
41182

42183
assert len(candidates.data) == 6
43184

44185

45186
def test_ngram_sentence_suggester():
46-
nlp = spacy.load("en_core_web_sm")
47-
doc = nlp("The first sentence. The second sentence. And the third sentence.")
187+
188+
nlp = spacy.blank("en")
189+
text = "The first sentence. The second sentence. And the third sentence."
190+
sents = [
191+
True,
192+
False,
193+
False,
194+
False,
195+
True,
196+
False,
197+
False,
198+
False,
199+
True,
200+
False,
201+
False,
202+
False,
203+
False,
204+
]
205+
206+
tokenized = nlp(text)
207+
spaces = [bool(t.whitespace_) for t in tokenized]
208+
doc = Doc(
209+
tokenized.vocab,
210+
words=[t.text for t in tokenized],
211+
spaces=spaces,
212+
sent_starts=sents,
213+
)
214+
48215
suggester = registry.misc.get("spacy-experimental.ngram_sentence_suggester.v1")([1])
49216
candidates = suggester([doc])
50217

51218
assert len(candidates.data) == 16
52219

53220

54221
def test_sentence_suggester():
55-
nlp = spacy.load("en_core_web_sm")
56-
doc = nlp("The first sentence. The second sentence. And the third sentence.")
222+
223+
nlp = spacy.blank("en")
224+
text = "The first sentence. The second sentence. And the third sentence."
225+
sents = [
226+
True,
227+
False,
228+
False,
229+
False,
230+
True,
231+
False,
232+
False,
233+
False,
234+
True,
235+
False,
236+
False,
237+
False,
238+
False,
239+
]
240+
241+
tokenized = nlp(text)
242+
spaces = [bool(t.whitespace_) for t in tokenized]
243+
doc = Doc(
244+
tokenized.vocab,
245+
words=[t.text for t in tokenized],
246+
spaces=spaces,
247+
sent_starts=sents,
248+
)
249+
57250
suggester = registry.misc.get("spacy-experimental.sentence_suggester.v1")()
58251
candidates = suggester([doc])
59252

0 commit comments

Comments
 (0)