Add unit tests for the ZPar all-caps single-word bug.

desilinguist · desilinguist · commit 792ab4a675cf · 2015-05-28T11:18:33.000-04:00
diff --git a/tests/test_depparser.py b/tests/test_depparser.py
@@ -37,6 +37,29 @@ def test_dep_parse_sentence():
     yield check_dep_parse_sentence, True
 
 
+def test_zpar_bugfix_depparse():
+    from tests import depparser
+
+    sentences = ['REBELLION',
+                 'I am going away .',
+                 'The rebellion is just another word for change and change is necessary to live .',
+                 'REBELLION',
+                 'REBELLION',
+                 'The rebellion is just another word for change and change is necessary to live .',
+                 'REBELLION',
+                 'This is just another sentence .',
+                 'REBELLION']
+
+    # tag the above sentences
+    parsed_sentences = [depparser.dep_parse_sentence(s) for s in sentences]
+
+    # get the parses for all of the all-caps single-word sentences
+    # and make sure they are all the same
+    indices_to_check = [0, 3, 4, 6, 8]
+    parses_to_check = [parsed_sentences[i] for i in indices_to_check]
+    assert_equal(set(parses_to_check), {'REBELLION\tNNP\t-1\tROOT\n'})
+
+
 def check_dep_parse_file(tokenize=False):
     """
     Check parse_file method with and without tokenization
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -38,6 +38,29 @@ def test_parse_sentence():
     yield check_parse_sentence, True
 
 
+def test_zpar_bugfix_parse():
+    from tests import parser
+
+    sentences = ['REBELLION',
+                 'I am going away .',
+                 'The rebellion is just another word for change and change is necessary to live .',
+                 'REBELLION',
+                 'REBELLION',
+                 'The rebellion is just another word for change and change is necessary to live .',
+                 'REBELLION',
+                 'This is just another sentence .',
+                 'REBELLION']
+
+    # tag the above sentences
+    parsed_sentences = [parser.parse_sentence(s) for s in sentences]
+
+    # get the parses for all of the all-caps single-word sentences
+    # and make sure they are all the same
+    indices_to_check = [0, 3, 4, 6, 8]
+    parses_to_check = [parsed_sentences[i] for i in indices_to_check]
+    assert_equal(set(parses_to_check), {'(NP (NNP REBELLION))'})
+
+
 def check_parse_file(tokenize=False):
     """
     Check parse_file method with and without tokenization
diff --git a/tests/test_tagger.py b/tests/test_tagger.py
@@ -38,6 +38,28 @@ def test_tag_sentence():
     yield check_tag_sentence, True
 
 
+def test_zpar_bugfix_tags():
+    from tests import tagger
+
+    sentences = ['REBELLION',
+                 'I am going away .',
+                 'The rebellion is just another word for change and change is necessary to live .',
+                 'REBELLION',
+                 'REBELLION',
+                 'The rebellion is just another word for change and change is necessary to live .',
+                 'REBELLION',
+                 'This is just another sentence .',
+                 'REBELLION']
+
+    # tag the above sentences
+    tagged_sentences = [tagger.tag_sentence(s) for s in sentences]
+
+    # get the tags for all of the all-caps single-word sentences
+    # and make sure they are all NNP
+    indices_to_check = [0, 3, 4, 6, 8]
+    tags_to_check = [tagged_sentences[i].split('/')[1] for i in indices_to_check]
+    assert_equal(set(tags_to_check), {'NNP'})
+
 def check_tag_file(tokenize=False):
     """
     Check tag_file method with and without tokenization