Skip to content
This repository was archived by the owner on Mar 9, 2023. It is now read-only.

Commit d4699a3

Browse files
Merge pull request #164 from WorksApplications/feature/kazuma-t/build_test_dict
Build test dictionaries on the fly
2 parents b445b4d + c054fce commit d4699a3

File tree

11 files changed

+184
-43
lines changed

11 files changed

+184
-43
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,8 @@ jobs:
3333
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
3434
- name: Check license header
3535
run: |
36-
HEADER=`cat scripts/license-header.txt`
37-
for FILE in `find setup.py sudachipy tests -name '*.py'`; do FILECONTENTS=`cat "$FILE"`; if [[ "$FILECONTENTS" != "$HEADER"* ]]; then >&2 echo "invalid license header on $FILE"; fi; done
36+
scripts/checkheader.sh
3837
- name: Test with unittest
3938
run: |
40-
cp .travis/system.dic.test tests/resources/system.dic
41-
cp .travis/user.dic.test tests/resources/user.dic
4239
python setup.py build_ext --inplace
43-
python -m unittest discover tests
40+
scripts/test.sh

.travis/system.dic.test

-6.86 KB
Binary file not shown.

.travis/user.dic.test

-1.52 KB
Binary file not shown.

scripts/checkheader.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
3+
HEADER=scripts/license-header.txt
4+
SIZE=`wc -c < "$HEADER"`
5+
6+
RES=`find setup.py sudachipy tests -type f -name '*.py' -exec cmp -n "$SIZE" "$HEADER" {} \;`
7+
if [ -n "$RES" ]; then
8+
echo "$RES" | awk '{print "invalid license header on " $2}' >&2
9+
exit 1
10+
fi

scripts/format.sh

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,5 @@ cd $(dirname $0)
55
flake8 --show --config=flake8.cfg ../sudachipy
66
flake8 --show --config=flake8.cfg ../tests
77

8-
HEADER=`cat license-header.txt`
9-
108
cd ..
11-
12-
array=()
13-
14-
for FILE in `find ./sudachipy -type f -name "*.py"`; do
15-
array+=( ${FILE} )
16-
done
17-
18-
for FILE in `find ./tests -type f -name "*.py"`; do
19-
array+=( ${FILE} )
20-
done
21-
22-
array+=( ./setup.py )
23-
24-
for FILE in ${array[@]}; do
25-
FILECONTENTS=`cat ${FILE}`
26-
if [[ ${FILECONTENTS} != ${HEADER}* ]]; then
27-
>&2 echo "invalid license header on ${FILE}"
28-
fi
29-
done
9+
scripts/checkheader.sh

scripts/test.sh

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,23 @@
55
# You need to prepare system.dic in resources and tests/resources
66
# see README
77

8-
cd $(dirname $0)
8+
set -e
99

10-
# check system.dic
11-
if [[ ! -f "../tests/resources/system.dic" ]]; then
12-
cp ../.travis/system.dic.test ../tests/resources/system.dic
13-
fi
14-
DIFF=$(diff ../.travis/system.dic.test ../tests/resources/system.dic)
15-
if [[ "$DIFF" != "" ]]; then
16-
cp ../.travis/system.dic.test ../tests/resources/system.dic
10+
# build dictionaries
11+
if !(type sudachipy > /dev/null 2>&1); then
12+
python setup.py develop
1713
fi
14+
sudachipy build -o tests/resources/system.dic -d "the system dictionary for the unit tests" -m tests/resources/dict/matrix.def tests/resources/dict/lex.csv
15+
sudachipy ubuild -o tests/resources/user.dic -s tests/resources/system.dic tests/resources/dict/user.csv
1816

19-
# check user.dic
20-
if [[ ! -f "../tests/resources/user.dic" ]]; then
21-
cp ../.travis/user.dic.test ../tests/resources/user.dic
22-
fi
23-
DIFF=$(diff ../.travis/user.dic.test ../tests/resources/user.dic)
24-
if [[ "$DIFF" != "" ]]; then
25-
cp ../.travis/user.dic.test ../tests/resources/user.dic
26-
fi
17+
set +e
2718

2819
# unittest
29-
RES=`cd ..; python -m unittest discover tests -p '*test*.py' 2>&1`
20+
RES=`python -m unittest discover tests -p '*test*.py' 2>&1`
21+
STATUS=$?
3022
RES_TAIL=`echo "$RES" | tail -1`
3123
if [[ $RES_TAIL != "OK" ]]; then
3224
>&2 echo "$RES"
3325
fi
26+
27+
exit $STATUS

tests/resources/dict/lex.csv

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
た,1,1,8729,た,助動詞,*,*,*,助動詞-タ,終止形-一般,タ,た,*,A,*,*,*,*
2+
に,2,2,11406,に,助詞,接続助詞,*,*,*,*,ニ,に,*,A,*,*,*,*
3+
に,3,3,4481,に,助詞,格助詞,*,*,*,*,ニ,に,*,A,*,*,*,*
4+
京都,6,6,5293,京都,名詞,固有名詞,地名,一般,*,*,キョウト,京都,*,A,*,*,*,1/5
5+
東,7,7,4675,東,名詞,普通名詞,一般,*,*,*,ヒガシ,東,*,A,*,*,*,*
6+
東京,6,6,2816,東京,名詞,固有名詞,地名,一般,*,*,トウキョウ,東京,*,A,*,*,*,*
7+
東京都,6,8,5320,東京都,名詞,固有名詞,地名,一般,*,*,トウキョウト,東京都,*,B,5/9,*,5/9,*
8+
行く,4,4,5105,行く,動詞,非自立可能,*,*,五段-カ行,終止形-一般,イク,行く,*,A,*,*,*,*
9+
行っ,5,5,5122,行っ,動詞,非自立可能,*,*,五段-カ行,連用形-促音便,イッ,行く,7,A,*,*,*,*
10+
都,8,8,2914,都,名詞,普通名詞,一般,*,*,*,ト,都,*,A,*,*,*,*
11+
アイ,7,7,4675,アイ,名詞,普通名詞,一般,*,*,*,アイ,アイ,*,A,*,*,*,*
12+
アイウ,7,7,4675,アイウ,名詞,普通名詞,一般,*,*,*,アイウ,アイウ,*,A,*,*,*,*
13+
アイアイウ,6,6,32766,アイウ,名詞,固有名詞,地名,一般,*,*,アイアイウ,アイアイウ,*,A,*,*,*,*
14+
0,9,9,2478,0,名詞,数詞,*,*,*,*,ゼロ,0,*,A,*,*,*,*
15+
1,9,9,2478,1,名詞,数詞,*,*,*,*,イチ,1,*,A,*,*,*,*
16+
2,9,9,2478,2,名詞,数詞,*,*,*,*,ニ,2,*,A,*,*,*,*
17+
3,9,9,2478,3,名詞,数詞,*,*,*,*,サン,3,*,A,*,*,*,*
18+
4,9,9,2478,4,名詞,数詞,*,*,*,*,ヨン,4,*,A,*,*,*,*
19+
5,9,9,2478,5,名詞,数詞,*,*,*,*,ゴ,5,*,A,*,*,*,*
20+
6,9,9,2478,6,名詞,数詞,*,*,*,*,ロク,6,*,A,*,*,*,*
21+
7,9,9,2478,7,名詞,数詞,*,*,*,*,ナナ,7,*,A,*,*,*,*
22+
8,9,9,2478,8,名詞,数詞,*,*,*,*,ハチ,8,*,A,*,*,*,*
23+
9,9,9,2478,9,名詞,数詞,*,*,*,*,キュウ,9,*,A,*,*,*,*
24+
〇,9,9,2478,〇,名詞,数詞,*,*,*,*,ゼロ,〇,*,A,*,*,*,*
25+
一,9,9,2478,一,名詞,数詞,*,*,*,*,イチ,一,*,A,*,*,*,*
26+
二,9,9,2478,二,名詞,数詞,*,*,*,*,ニ,二,*,A,*,*,*,*
27+
三,9,9,2478,三,名詞,数詞,*,*,*,*,サン,三,*,A,*,*,*,*
28+
四,9,9,2478,四,名詞,数詞,*,*,*,*,ヨン,四,*,A,*,*,*,*
29+
五,9,9,2478,五,名詞,数詞,*,*,*,*,ゴ,五,*,A,*,*,*,*
30+
六,9,9,2478,六,名詞,数詞,*,*,*,*,ロク,六,*,A,*,*,*,*
31+
七,9,9,2478,七,名詞,数詞,*,*,*,*,ナナ,七,*,A,*,*,*,*
32+
八,9,9,2478,八,名詞,数詞,*,*,*,*,ハチ,八,*,A,*,*,*,*
33+
九,9,9,2478,九,名詞,数詞,*,*,*,*,キュウ,九,*,A,*,*,*,*
34+
六三四,6,6,0,六三四,名詞,固有名詞,地名,一般,*,*,ムサシ,六三四,*,A,*,*,*,*
35+
いく,4,4,5105,いく,動詞,非自立可能,*,*,五段-カ行,終止形-一般,イク,行く,*,A,*,*,*,*
36+
いっ,5,5,5122,いっ,動詞,非自立可能,*,*,五段-カ行,連用形-促音便,イッ,行く,34,A,*,*,*,*
37+
012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789,9,9,2478,012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789,名詞,数詞,*,*,*,*,ゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウ,012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789,*,A,*,*,*,*
38+
特a,8,8,2914,特a,名詞,普通名詞,一般,*,*,*,トクエー,特a,*,A,*,*,*,*
39+
な。な,8,8,2914,な。な,名詞,普通名詞,一般,*,*,*,ナナ,な。な,*,A,*,*,*,*

tests/resources/dict/matrix.def

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
10 10
2+
0 0 0
3+
0 1 863
4+
0 2 2124
5+
0 3 1032
6+
0 4 591
7+
0 5 -162
8+
0 6 -79
9+
0 7 887
10+
0 8 447
11+
0 9 -535
12+
1 0 -3689
13+
1 1 -3361
14+
1 2 -7643
15+
1 3 -3267
16+
1 4 809
17+
1 5 -1098
18+
1 6 4606
19+
1 7 4269
20+
1 8 4567
21+
1 9 1635
22+
2 0 -1959
23+
2 1 2457
24+
2 2 811
25+
2 3 840
26+
2 4 903
27+
2 5 -958
28+
2 6 517
29+
2 7 2037
30+
2 8 1392
31+
2 9 -193
32+
3 0 -2288
33+
3 1 1741
34+
3 2 487
35+
3 3 792
36+
3 4 -1474
37+
3 5 -3429
38+
3 6 126
39+
3 7 437
40+
3 8 605
41+
3 9 -547
42+
4 0 -2809
43+
4 1 -3584
44+
4 2 -6743
45+
4 3 -2869
46+
4 4 -2805
47+
4 5 -407
48+
4 6 3422
49+
4 7 5642
50+
4 8 6382
51+
4 9 2165
52+
5 0 -509
53+
5 1 -3665
54+
5 2 -3882
55+
5 3 -572
56+
5 4 -1036
57+
5 5 -54
58+
5 6 2570
59+
5 7 3319
60+
5 8 4059
61+
5 9 882
62+
6 0 101
63+
6 1 2933
64+
6 2 2198
65+
6 3 -2004
66+
6 4 4392
67+
6 5 4017
68+
6 6 569
69+
6 7 475
70+
6 8 -390
71+
6 9 852
72+
7 0 -852
73+
7 1 2079
74+
7 2 1180
75+
7 3 -3084
76+
7 4 2010
77+
7 5 1570
78+
7 6 746
79+
7 7 2341
80+
7 8 2051
81+
7 9 1393
82+
8 0 -522
83+
8 1 3354
84+
8 2 2037
85+
8 3 -2542
86+
8 4 3071
87+
8 5 2631
88+
8 6 -352
89+
8 7 2847
90+
8 8 1134
91+
8 9 1256
92+
9 0 -975
93+
9 1 2498
94+
9 2 1690
95+
9 3 -1523
96+
9 4 3023
97+
9 5 3139
98+
9 6 2562
99+
9 7 3962
100+
9 8 418
101+
9 9 -2490

tests/resources/dict/user.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ぴらる,8,8,-32768,ぴらる,名詞,普通名詞,一般,*,*,*,ピラル,ぴらる,*,A,*,*,*,*
2+
府,8,8,2914,府,名詞,普通名詞,一般,*,*,*,フ,府,*,A,*,*,*,*
3+
東京府,6,6,2816,東京府,名詞,固有名詞,地名,一般,*,*,トウキョウフ,東京府,*,B,5/U1,*,5/U1,1/3
4+
すだち,6,6,2816,すだち,被子植物門,双子葉植物綱,ムクロジ目,ミカン科,ミカン属,スダチ,スダチ,すだち,*,A,*,*,*,*

tests/resources/dict/user2.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ぴさる,8,8,-32768,ぴさる,名詞,普通名詞,一般,*,*,*,ピサル,ぴさる,*,A,*,*,*,*
2+
かぼす,6,6,2816,かぼす,被子植物門,双子葉植物綱,ムクロジ目,ミカン科,ミカン属,カボス,カボス,かぼす,*,A,*,*,*,*

0 commit comments

Comments
 (0)