Skip to content
This repository was archived by the owner on Sep 13, 2022. It is now read-only.

Commit 2388603

Browse files
committed
Merge pull request #11 from EducationalTestingService/feature/add_unit_tests
Unit Tests
2 parents 2f3f0de + 6b054e8 commit 2388603

File tree

4 files changed

+273
-0
lines changed

4 files changed

+273
-0
lines changed

tests/__init__.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""
2+
Module for running a bunch of simple unit tests. Should be expanded more in
3+
the future.
4+
5+
:author: Nitin Madnani ([email protected])
6+
"""
7+
8+
from __future__ import (absolute_import, division, print_function,
9+
unicode_literals)
10+
11+
import glob
12+
import itertools
13+
import os
14+
15+
from io import open
16+
from os.path import abspath, dirname, exists, join
17+
18+
from zpar import ZPar
19+
20+
_my_dir = abspath(dirname(__file__))
21+
22+
z = None
23+
tagger = None
24+
parser = None
25+
depparser = None
26+
27+
28+
def setUp():
29+
"""
30+
set up things we need for the tests
31+
"""
32+
global z, tagger, parser, depparser
33+
34+
assert 'ZPAR_MODEL_DIR' in os.environ
35+
36+
model_dir = os.environ['ZPAR_MODEL_DIR']
37+
38+
z = ZPar(model_dir)
39+
tagger = z.get_tagger()
40+
parser = z.get_parser()
41+
depparser = z.get_depparser()
42+
43+
44+
def tearDown():
45+
"""
46+
Clean up after the tests
47+
"""
48+
global z, tagger, parser, depparser
49+
50+
if z:
51+
z.close()
52+
del tagger
53+
del parser
54+
del depparser
55+
del z
56+
57+
# delete all the files we may have created
58+
data_dir = abspath(join(_my_dir, '..', 'examples'))
59+
for f in glob.glob(join(data_dir, 'test*.tag')):
60+
os.unlink(f)
61+
for f in glob.glob(join(data_dir, 'test*.parse')):
62+
os.unlink(f)
63+
for f in glob.glob(join(data_dir, 'test*.dep')):
64+
os.unlink(f)

tests/test_depparser.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""
2+
Module for running a bunch of simple unit tests. Should be expanded more in
3+
the future.
4+
5+
:author: Nitin Madnani ([email protected])
6+
"""
7+
8+
from __future__ import (absolute_import, division, print_function,
9+
unicode_literals)
10+
11+
import itertools
12+
import os
13+
14+
from io import open
15+
from os.path import abspath, dirname, exists, join
16+
17+
import numpy as np
18+
from nose.tools import eq_, raises, assert_equal, assert_not_equal
19+
20+
_my_dir = abspath(dirname(__file__))
21+
22+
23+
def check_dep_parse_sentence(tokenize=False):
24+
"""
25+
Check dep_parse_sentence method with and without tokenization
26+
"""
27+
from tests import depparser
28+
29+
sentence = "I'm going to the market." if tokenize else "I 'm going to the market ."
30+
correct_output = "I\tPRP\t1\tSUB\n'm\tVBP\t-1\tROOT\ngoing\tVBG\t1\tVC\nto\tTO\t2\tVMOD\nthe\tDT\t5\tNMOD\nmarket\tNN\t3\tPMOD\n.\t.\t1\tP\n"
31+
parsed_sentence = depparser.dep_parse_sentence(sentence, tokenize=tokenize)
32+
assert_equal(parsed_sentence, correct_output)
33+
34+
35+
def test_dep_parse_sentence():
36+
yield check_dep_parse_sentence, False
37+
yield check_dep_parse_sentence, True
38+
39+
40+
def check_dep_parse_file(tokenize=False):
41+
"""
42+
Check parse_file method with and without tokenization
43+
"""
44+
45+
from tests import depparser
46+
47+
prefix = 'test' if tokenize else 'test_tokenized'
48+
49+
correct_output = ['I\tPRP\t1\tSUB', 'am\tVBP\t-1\tROOT',
50+
'going\tVBG\t1\tVC', 'to\tTO\t2\tVMOD',
51+
'the\tDT\t5\tNMOD', 'market\tNN\t3\tPMOD',
52+
'.\t.\t1\tP', '', 'Are\tVBP\t-1\tROOT',
53+
'you\tPRP\t0\tSUB', 'going\tVBG\t0\tVMOD',
54+
'to\tTO\t4\tVMOD', 'come\tVB\t2\tVMOD',
55+
'with\tIN\t4\tVMOD', 'me\tPRP\t5\tPMOD',
56+
'?\t.\t0\tP', '']
57+
58+
input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
59+
output_file = abspath(join(_my_dir, '..', 'examples', '{}.dep'.format(prefix)))
60+
61+
# dependency parse the file
62+
depparser.dep_parse_file(input_file, output_file, tokenize=tokenize)
63+
64+
# read the output file and make sure we have the expected output
65+
with open(output_file, 'r') as outf:
66+
output = [l.strip() for l in outf.readlines()]
67+
68+
assert_equal(output, correct_output)
69+
70+
71+
def test_dep_parse_file():
72+
yield check_dep_parse_file, False
73+
yield check_dep_parse_file, True

tests/test_parser.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Module for running a bunch of simple unit tests. Should be expanded more in
3+
the future.
4+
5+
:author: Nitin Madnani ([email protected])
6+
"""
7+
8+
from __future__ import (absolute_import, division, print_function,
9+
unicode_literals)
10+
11+
import itertools
12+
import os
13+
14+
from io import open
15+
from os.path import abspath, dirname, exists, join
16+
17+
import numpy as np
18+
from nose.tools import eq_, raises, assert_equal, assert_not_equal
19+
20+
_my_dir = abspath(dirname(__file__))
21+
22+
23+
def check_parse_sentence(tokenize=False):
24+
"""
25+
Check parse_sentence method with and without tokenization
26+
"""
27+
from tests import parser
28+
29+
sentence = "I'm going to the market." if tokenize else "I 'm going to the market ."
30+
correct_output = "(S (NP (PRP I)) (VP (VBP 'm) (VP (VBG going) (PP (TO to) (NP (DT the) (NN market))))) (. .))"
31+
parsed_sentence = parser.parse_sentence(sentence, tokenize=tokenize)
32+
33+
assert_equal(parsed_sentence, correct_output)
34+
35+
36+
def test_parse_sentence():
37+
yield check_parse_sentence, False
38+
yield check_parse_sentence, True
39+
40+
41+
def check_parse_file(tokenize=False):
42+
"""
43+
Check parse_file method with and without tokenization
44+
"""
45+
46+
from tests import parser
47+
48+
prefix = 'test' if tokenize else 'test_tokenized'
49+
50+
correct_output = ["(S (NP (PRP I)) (VP (VBP am) (VP (VBG going) (PP (TO to) (NP (DT the) (NN market))))) (. .))",
51+
"(SQ (VBP Are) (NP (PRP you)) (VP (VBG going) (S (VP (TO to) (VP (VB come) (PP (IN with) (NP (PRP me))))))) (. ?))"]
52+
53+
input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
54+
output_file = abspath(join(_my_dir, '..', 'examples', '{}.parse'.format(prefix)))
55+
56+
# parse the file
57+
parser.parse_file(input_file, output_file, tokenize=tokenize)
58+
59+
# read the output file and make sure we have the expected output
60+
with open(output_file, 'r') as outf:
61+
output = [l.strip() for l in outf.readlines()]
62+
63+
assert_equal(output, correct_output)
64+
65+
66+
def test_parse_file():
67+
yield check_parse_file, False
68+
yield check_parse_file, True

tests/test_tagger.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Module for running a bunch of simple unit tests. Should be expanded more in
3+
the future.
4+
5+
:author: Nitin Madnani ([email protected])
6+
"""
7+
8+
from __future__ import (absolute_import, division, print_function,
9+
unicode_literals)
10+
11+
import itertools
12+
import os
13+
14+
from io import open
15+
from os.path import abspath, dirname, exists, join
16+
17+
import numpy as np
18+
from nose.tools import eq_, raises, assert_equal, assert_not_equal
19+
20+
_my_dir = abspath(dirname(__file__))
21+
22+
23+
def check_tag_sentence(tokenize=False):
24+
"""
25+
Check tag_sentence method with and without tokenization
26+
"""
27+
from tests import tagger
28+
29+
sentence = "I'm going to the market." if tokenize else "I 'm going to the market ."
30+
correct_output = "I/PRP 'm/VBP going/VBG to/TO the/DT market/NN ./."
31+
tagged_sentence = tagger.tag_sentence(sentence, tokenize=tokenize)
32+
33+
assert_equal(tagged_sentence, correct_output)
34+
35+
36+
def test_tag_sentence():
37+
yield check_tag_sentence, False
38+
yield check_tag_sentence, True
39+
40+
41+
def check_tag_file(tokenize=False):
42+
"""
43+
Check tag_file method with and without tokenization
44+
"""
45+
46+
from tests import tagger
47+
48+
prefix = 'test' if tokenize else 'test_tokenized'
49+
50+
correct_output = ['I/PRP am/VBP going/VBG to/TO the/DT market/NN ./.',
51+
'Are/VBP you/PRP going/VBG to/TO come/VB with/IN me/PRP ?/.']
52+
53+
input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
54+
output_file = abspath(join(_my_dir, '..', 'examples', '{}.tag'.format(prefix)))
55+
56+
# tag the file
57+
tagger.tag_file(input_file, output_file, tokenize=tokenize)
58+
59+
# read the output file and make sure we have the expected output
60+
with open(output_file, 'r') as outf:
61+
output = [l.strip() for l in outf.readlines()]
62+
63+
assert_equal(output, correct_output)
64+
65+
66+
def test_tag_file():
67+
yield check_tag_file, False
68+
yield check_tag_file, True

0 commit comments

Comments
 (0)