Skip to content

Commit 1d8eb8c

Browse files
danieldkDaniël de Kok
authored andcommitted
Cleanup unit tests
- Use fixtures to remove repeated code. - Replace some incorrect try-except blocks by pytest.raises. - Miscellaneous cleaning.
1 parent 3869c64 commit 1d8eb8c

File tree

5 files changed

+158
-128
lines changed

5 files changed

+158
-128
lines changed

tests/conftest.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import os
2+
3+
import finalfusion
4+
import numpy
5+
import pytest
6+
7+
8+
@pytest.fixture
9+
def analogy_fifu(tests_root):
10+
yield finalfusion.Embeddings(os.path.join(tests_root, "analogy.fifu"))
11+
12+
13+
@pytest.fixture
14+
def embeddings_fifu(tests_root):
15+
yield finalfusion.Embeddings(os.path.join(tests_root, "embeddings.fifu"))
16+
17+
18+
@pytest.fixture
19+
def embeddings_text(tests_root):
20+
embeds = dict()
21+
22+
with open(os.path.join(tests_root, "embeddings.txt"), "r", encoding="utf8") as lines:
23+
for line in lines:
24+
line_list = line.split(' ')
25+
embeds[line_list[0]] = numpy.array(
26+
[float(c) for c in line_list[1:]])
27+
28+
yield embeds
29+
30+
31+
@pytest.fixture
32+
def similarity_fifu(tests_root):
33+
yield finalfusion.Embeddings(os.path.join(tests_root, "similarity.fifu"))
34+
35+
36+
@pytest.fixture
37+
def tests_root():
38+
yield os.path.dirname(__file__)

tests/test_analogy.py

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import finalfusion
1+
import pytest
22

33
ANALOGY_ORDER = [
44
"Deutschland",
@@ -43,22 +43,48 @@
4343
"Westfalen",
4444
]
4545

46-
def test_analogies():
47-
embeds = finalfusion.Embeddings('tests/analogy.fifu')
48-
for idx, analogy in enumerate(embeds.analogy("Paris", "Frankreich", "Berlin", 40)):
46+
47+
def test_analogies(analogy_fifu):
48+
for idx, analogy in enumerate(
49+
analogy_fifu.analogy(
50+
"Paris", "Frankreich", "Berlin", 40)):
4951
assert ANALOGY_ORDER[idx] == analogy.word
5052

51-
assert embeds.analogy("Paris", "Frankreich", "Paris", 1, (True, False, True))[0].word == "Frankreich"
52-
assert embeds.analogy("Paris", "Frankreich", "Paris", 1, (True, True, True))[0].word != "Frankreich"
53-
assert embeds.analogy("Frankreich", "Frankreich", "Frankreich", 1, (False, False, False))[0].word == "Frankreich"
54-
assert embeds.analogy("Frankreich", "Frankreich", "Frankreich", 1, (False, False, True))[0].word != "Frankreich"
55-
try:
56-
embeds.analogy("Paris", "Frankreich", "Paris", 1, (True, True))
57-
assert True == False
58-
except:
59-
()
60-
try:
61-
embeds.analogy("Paris", "Frankreich", "Paris", 1, (True, True, True, True))
62-
assert True == False
63-
except:
64-
()
53+
assert analogy_fifu.analogy(
54+
"Paris",
55+
"Frankreich",
56+
"Paris",
57+
1,
58+
(True,
59+
False,
60+
True))[0].word == "Frankreich"
61+
assert analogy_fifu.analogy(
62+
"Paris",
63+
"Frankreich",
64+
"Paris",
65+
1,
66+
(True,
67+
True,
68+
True))[0].word != "Frankreich"
69+
assert analogy_fifu.analogy(
70+
"Frankreich",
71+
"Frankreich",
72+
"Frankreich",
73+
1,
74+
(False,
75+
False,
76+
False))[0].word == "Frankreich"
77+
assert analogy_fifu.analogy(
78+
"Frankreich",
79+
"Frankreich",
80+
"Frankreich",
81+
1,
82+
(False,
83+
False,
84+
True))[0].word != "Frankreich"
85+
86+
with pytest.raises(ValueError):
87+
analogy_fifu.analogy("Paris", "Frankreich", "Paris", 1, (True, True))
88+
with pytest.raises(ValueError):
89+
analogy_fifu.analogy("Paris", "Frankreich", "Paris",
90+
1, (True, True, True, True))

tests/test_embedding.py

Lines changed: 16 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import finalfusion
21
import pytest
32
import numpy
43

@@ -13,63 +12,38 @@
1312
]
1413

1514

16-
def test_embeddings_with_norms():
17-
embeds = finalfusion.Embeddings(
18-
"tests/embeddings.fifu")
19-
embeds_dict = dict()
20-
with open("tests/embeddings.txt", "r", encoding="utf8") as lines:
21-
for line in lines:
22-
line_list = line.split(' ')
23-
embeds_dict[line_list[0]] = [float(val) for val in line_list[1:]]
24-
25-
for embedding_with_norm, norm in zip(embeds.iter_with_norm(), TEST_NORMS):
15+
def test_embeddings_with_norms(embeddings_fifu, embeddings_text):
16+
for embedding_with_norm, norm in zip(
17+
embeddings_fifu.iter_with_norm(), TEST_NORMS):
2618
unnormed_embed = embedding_with_norm[1] * norm
27-
test_embed = embeds_dict[embedding_with_norm[0]]
19+
test_embed = embeddings_text[embedding_with_norm[0]]
2820
assert numpy.allclose(
2921
unnormed_embed, test_embed), "Embedding from 'iter_with_norm()' fails to match!"
3022
assert len(
3123
embedding_with_norm) == 3, "The number of values returned by 'iter_with_norm()' does not match!"
3224

3325

34-
def test_embeddings_with_norms_oov():
35-
embeds = finalfusion.Embeddings(
36-
"tests/embeddings.fifu")
37-
assert embeds.embedding_with_norm("Something out of vocabulary") is None
38-
26+
def test_embeddings_with_norms_oov(embeddings_fifu):
27+
assert embeddings_fifu.embedding_with_norm(
28+
"Something out of vocabulary") is None
3929

40-
def test_embeddings():
41-
embeds = finalfusion.Embeddings(
42-
"tests/embeddings.fifu")
43-
embeds_dict = dict()
44-
with open("tests/embeddings.txt", "r", encoding="utf8") as lines:
45-
for line in lines:
46-
line_list = line.split(' ')
47-
embeds_dict[line_list[0]] = [float(i) for i in line_list[1:]]
4830

49-
for embedding_with_norm, norm in zip(embeds, TEST_NORMS):
31+
def test_embeddings(embeddings_fifu, embeddings_text):
32+
for embedding_with_norm, norm in zip(embeddings_fifu, TEST_NORMS):
5033
unnormed_embed = embedding_with_norm[1] * norm
51-
test_embed = embeds_dict[embedding_with_norm[0]]
34+
test_embed = embeddings_text[embedding_with_norm[0]]
5235
assert numpy.allclose(
5336
unnormed_embed, test_embed), "Embedding from normal iterator fails to match!"
5437
assert len(
5538
embedding_with_norm) == 2, "The number of values returned by normal iterator does not match!"
5639

5740

58-
def test_embeddings_oov():
59-
embeds = finalfusion.Embeddings(
60-
"tests/embeddings.fifu")
61-
assert embeds.embedding("Something out of vocabulary") is None
62-
41+
def test_embeddings_oov(embeddings_fifu):
42+
assert embeddings_fifu.embedding("Something out of vocabulary") is None
6343

64-
def test_norms():
65-
embeds = finalfusion.Embeddings(
66-
"tests/embeddings.fifu")
67-
embeds_dict = dict()
68-
with open("tests/embeddings.txt", "r", encoding="utf8") as lines:
69-
for line in lines:
70-
line_list = line.split(' ')
71-
embeds_dict[line_list[0]] = [float(val) for val in line_list[1:]]
7244

73-
for embedding_with_norm, norm in zip(embeds.iter_with_norm(), TEST_NORMS):
45+
def test_norms(embeddings_fifu):
46+
for embedding_with_norm, norm in zip(
47+
embeddings_fifu.iter_with_norm(), TEST_NORMS):
7448
assert pytest.approx(
75-
embedding_with_norm[2] == norm), "Norm fails to match!"
49+
embedding_with_norm[2]) == norm, "Norm fails to match!"

tests/test_similarity.py

Lines changed: 58 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,66 @@
1-
import finalfusion
2-
31
SIMILARITY_ORDER_STUTTGART_10 = [
4-
"Karlsruhe",
5-
"Mannheim",
6-
"München",
7-
"Darmstadt",
8-
"Heidelberg",
9-
"Wiesbaden",
10-
"Kassel",
11-
"Düsseldorf",
12-
"Leipzig",
13-
"Berlin",
14-
];
2+
"Karlsruhe",
3+
"Mannheim",
4+
"München",
5+
"Darmstadt",
6+
"Heidelberg",
7+
"Wiesbaden",
8+
"Kassel",
9+
"Düsseldorf",
10+
"Leipzig",
11+
"Berlin",
12+
]
1513

1614

1715
SIMILARITY_ORDER = [
18-
"Potsdam",
19-
"Hamburg",
20-
"Leipzig",
21-
"Dresden",
22-
"München",
23-
"Düsseldorf",
24-
"Bonn",
25-
"Stuttgart",
26-
"Weimar",
27-
"Berlin-Charlottenburg",
28-
"Rostock",
29-
"Karlsruhe",
30-
"Chemnitz",
31-
"Breslau",
32-
"Wiesbaden",
33-
"Hannover",
34-
"Mannheim",
35-
"Kassel",
36-
"Köln",
37-
"Danzig",
38-
"Erfurt",
39-
"Dessau",
40-
"Bremen",
41-
"Charlottenburg",
42-
"Magdeburg",
43-
"Neuruppin",
44-
"Darmstadt",
45-
"Jena",
46-
"Wien",
47-
"Heidelberg",
48-
"Dortmund",
49-
"Stettin",
50-
"Schwerin",
51-
"Neubrandenburg",
52-
"Greifswald",
53-
"Göttingen",
54-
"Braunschweig",
55-
"Berliner",
56-
"Warschau",
57-
"Berlin-Spandau",
58-
];
16+
"Potsdam",
17+
"Hamburg",
18+
"Leipzig",
19+
"Dresden",
20+
"München",
21+
"Düsseldorf",
22+
"Bonn",
23+
"Stuttgart",
24+
"Weimar",
25+
"Berlin-Charlottenburg",
26+
"Rostock",
27+
"Karlsruhe",
28+
"Chemnitz",
29+
"Breslau",
30+
"Wiesbaden",
31+
"Hannover",
32+
"Mannheim",
33+
"Kassel",
34+
"Köln",
35+
"Danzig",
36+
"Erfurt",
37+
"Dessau",
38+
"Bremen",
39+
"Charlottenburg",
40+
"Magdeburg",
41+
"Neuruppin",
42+
"Darmstadt",
43+
"Jena",
44+
"Wien",
45+
"Heidelberg",
46+
"Dortmund",
47+
"Stettin",
48+
"Schwerin",
49+
"Neubrandenburg",
50+
"Greifswald",
51+
"Göttingen",
52+
"Braunschweig",
53+
"Berliner",
54+
"Warschau",
55+
"Berlin-Spandau",
56+
]
57+
5958

60-
def test_similarity_berlin_40():
61-
embeds = finalfusion.Embeddings('tests/similarity.fifu')
62-
for idx, sim in enumerate(embeds.similarity("Berlin", 40)):
59+
def test_similarity_berlin_40(similarity_fifu):
60+
for idx, sim in enumerate(similarity_fifu.similarity("Berlin", 40)):
6361
assert SIMILARITY_ORDER[idx] == sim.word
6462

65-
def test_similarity_stuttgart_10():
66-
embeds = finalfusion.Embeddings('tests/similarity.fifu')
67-
for idx, sim in enumerate(embeds.similarity("Stuttgart", 10)):
63+
64+
def test_similarity_stuttgart_10(similarity_fifu):
65+
for idx, sim in enumerate(similarity_fifu.similarity("Stuttgart", 10)):
6866
assert SIMILARITY_ORDER_STUTTGART_10[idx] == sim.word

tests/test_vocab.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
import finalfusion
2-
import pytest
3-
4-
5-
def test_embeddings_with_norms_oov():
6-
embeds = finalfusion.Embeddings(
7-
"tests/embeddings.fifu")
8-
vocab = embeds.vocab()
1+
def test_embeddings_with_norms_oov(embeddings_fifu):
2+
vocab = embeddings_fifu.vocab()
93
assert vocab.item_to_indices("Something out of vocabulary") is None

0 commit comments

Comments
 (0)