forked from explosion/spaCy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_issue3009.py
More file actions
67 lines (56 loc) · 1.55 KB
/
test_issue3009.py
File metadata and controls
67 lines (56 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# coding: utf-8
from __future__ import unicode_literals
import pytest
from spacy.matcher import Matcher
from spacy.tokens import Doc
PATTERNS = [
("1", [[{"LEMMA": "have"}, {"LOWER": "to"}, {"LOWER": "do"}, {"POS": "ADP"}]]),
(
"2",
[
[
{"LEMMA": "have"},
{"IS_ASCII": True, "IS_PUNCT": False, "OP": "*"},
{"LOWER": "to"},
{"LOWER": "do"},
{"POS": "ADP"},
]
],
),
(
"3",
[
[
{"LEMMA": "have"},
{"IS_ASCII": True, "IS_PUNCT": False, "OP": "?"},
{"LOWER": "to"},
{"LOWER": "do"},
{"POS": "ADP"},
]
],
),
]
@pytest.fixture
def doc(en_tokenizer):
doc = en_tokenizer("also has to do with")
doc[0].tag_ = "RB"
doc[1].tag_ = "VBZ"
doc[2].tag_ = "TO"
doc[3].tag_ = "VB"
doc[4].tag_ = "IN"
return doc
@pytest.fixture
def matcher(en_tokenizer):
return Matcher(en_tokenizer.vocab)
@pytest.mark.parametrize("pattern", PATTERNS)
def test_issue3009(doc, matcher, pattern):
"""Test problem with matcher quantifiers"""
matcher.add(pattern[0], None, *pattern[1])
matches = matcher(doc)
assert matches
def test_issue2464(matcher):
"""Test problem with successive ?. This is the same bug, so putting it here."""
doc = Doc(matcher.vocab, words=["a", "b"])
matcher.add("4", None, [{"OP": "?"}, {"OP": "?"}])
matches = matcher(doc)
assert len(matches) == 3