Skip to content
This repository was archived by the owner on Sep 13, 2022. It is now read-only.

Commit 792ab4a

Browse files
committed
Add unit tests for the ZPar all-caps single-word bug.
1 parent 68a479e commit 792ab4a

File tree

3 files changed

+68
-0
lines changed

3 files changed

+68
-0
lines changed

tests/test_depparser.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,29 @@ def test_dep_parse_sentence():
3737
yield check_dep_parse_sentence, True
3838

3939

40+
def test_zpar_bugfix_depparse():
41+
from tests import depparser
42+
43+
sentences = ['REBELLION',
44+
'I am going away .',
45+
'The rebellion is just another word for change and change is necessary to live .',
46+
'REBELLION',
47+
'REBELLION',
48+
'The rebellion is just another word for change and change is necessary to live .',
49+
'REBELLION',
50+
'This is just another sentence .',
51+
'REBELLION']
52+
53+
# tag the above sentences
54+
parsed_sentences = [depparser.dep_parse_sentence(s) for s in sentences]
55+
56+
# get the parses for all of the all-caps single-word sentences
57+
# and make sure they are all the same
58+
indices_to_check = [0, 3, 4, 6, 8]
59+
parses_to_check = [parsed_sentences[i] for i in indices_to_check]
60+
assert_equal(set(parses_to_check), {'REBELLION\tNNP\t-1\tROOT\n'})
61+
62+
4063
def check_dep_parse_file(tokenize=False):
4164
"""
4265
Check parse_file method with and without tokenization

tests/test_parser.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,29 @@ def test_parse_sentence():
3838
yield check_parse_sentence, True
3939

4040

41+
def test_zpar_bugfix_parse():
42+
from tests import parser
43+
44+
sentences = ['REBELLION',
45+
'I am going away .',
46+
'The rebellion is just another word for change and change is necessary to live .',
47+
'REBELLION',
48+
'REBELLION',
49+
'The rebellion is just another word for change and change is necessary to live .',
50+
'REBELLION',
51+
'This is just another sentence .',
52+
'REBELLION']
53+
54+
# tag the above sentences
55+
parsed_sentences = [parser.parse_sentence(s) for s in sentences]
56+
57+
# get the parses for all of the all-caps single-word sentences
58+
# and make sure they are all the same
59+
indices_to_check = [0, 3, 4, 6, 8]
60+
parses_to_check = [parsed_sentences[i] for i in indices_to_check]
61+
assert_equal(set(parses_to_check), {'(NP (NNP REBELLION))'})
62+
63+
4164
def check_parse_file(tokenize=False):
4265
"""
4366
Check parse_file method with and without tokenization

tests/test_tagger.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,28 @@ def test_tag_sentence():
3838
yield check_tag_sentence, True
3939

4040

41+
def test_zpar_bugfix_tags():
42+
from tests import tagger
43+
44+
sentences = ['REBELLION',
45+
'I am going away .',
46+
'The rebellion is just another word for change and change is necessary to live .',
47+
'REBELLION',
48+
'REBELLION',
49+
'The rebellion is just another word for change and change is necessary to live .',
50+
'REBELLION',
51+
'This is just another sentence .',
52+
'REBELLION']
53+
54+
# tag the above sentences
55+
tagged_sentences = [tagger.tag_sentence(s) for s in sentences]
56+
57+
# get the tags for all of the all-caps single-word sentences
58+
# and make sure they are all NNP
59+
indices_to_check = [0, 3, 4, 6, 8]
60+
tags_to_check = [tagged_sentences[i].split('/')[1] for i in indices_to_check]
61+
assert_equal(set(tags_to_check), {'NNP'})
62+
4163
def check_tag_file(tokenize=False):
4264
"""
4365
Check tag_file method with and without tokenization

0 commit comments

Comments
 (0)