Skip to content
This repository was archived by the owner on Mar 9, 2023. It is now read-only.

Commit 3a2f344

Browse files
committed
add test for specifying system dicitonary by the config file
1 parent 8f53044 commit 3a2f344

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed

tests/test_switchdictionary.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import json
2+
import os
3+
import shutil
4+
import tempfile
5+
import time
6+
from logging import getLogger
7+
from unittest import TestCase
8+
9+
from sudachipy.dictionary import Dictionary
10+
from sudachipy.dictionarylib import SYSTEM_DICT_VERSION_2
11+
from sudachipy.dictionarylib.dictionarybuilder import DictionaryBuilder
12+
from sudachipy.dictionarylib.dictionaryheader import DictionaryHeader
13+
14+
15+
class TestSwitchDictionary(TestCase):
16+
17+
def setUp(self):
18+
self.logger = getLogger()
19+
self.logger.disabled = True
20+
21+
self.temp_dir = tempfile.mkdtemp()
22+
self.resource_dir = os.path.join(self.temp_dir, 'resources')
23+
os.makedirs(self.resource_dir)
24+
25+
test_resource_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'resources')
26+
self.char_def_path = os.path.join(self.resource_dir, 'char.def')
27+
shutil.copy(os.path.join(test_resource_dir, 'char.def'), self.char_def_path)
28+
29+
self.sudachi_json_path = os.path.join(self.resource_dir, 'sudachi.json')
30+
shutil.copy(os.path.join(test_resource_dir, 'sudachi.json'), self.sudachi_json_path)
31+
self._rewrite_json(self.sudachi_json_path, 'userDict', [])
32+
33+
self.matrix_path = os.path.join(self.resource_dir, 'matrix.txt')
34+
with open(self.matrix_path, 'w', encoding='utf-8') as wf:
35+
wf.write('1 1\n0 0 200\n')
36+
37+
small_lexs = ["島,0,0,0,島,名詞,普通名詞,一般,*,*,*,シマ,島,*,A,*,*,*"]
38+
core_lexs = ["徳島本町,0,0,0,徳島本町,名詞,固有名詞,地名,一般,*,*,トクシマホンチョウ,徳島本町,*,A,*,*,*,*"]
39+
notcore_lexs = ["徳島堰,0,0,0,徳島堰,名詞,固有名詞,一般,*,*,*,トクシマセギ,徳島堰,*,A,*,*,*"]
40+
41+
small_lines = small_lexs
42+
core_lines = small_lexs + core_lexs
43+
full_lines = small_lexs + core_lexs + notcore_lexs
44+
45+
self.small_txt_path = os.path.join(self.resource_dir, 'small.csv')
46+
self.core_txt_path = os.path.join(self.resource_dir, 'core.csv')
47+
self.full_txt_path = os.path.join(self.resource_dir, 'full.csv')
48+
49+
self.small_dic_path = self._build_dictionary(self.small_txt_path, small_lines, 'small.dic')
50+
self.core_dic_path = self._build_dictionary(self.core_txt_path, core_lines, 'core.dic')
51+
self.full_dic_path = self._build_dictionary(self.full_txt_path, full_lines, 'full.dic')
52+
53+
def tearDown(self):
54+
shutil.rmtree(self.temp_dir)
55+
56+
@staticmethod
57+
def _rewrite_json(json_file_path, k, v):
58+
with open(json_file_path, 'r') as f:
59+
obj = json.load(f)
60+
obj[k] = v
61+
with open(json_file_path, 'w') as f:
62+
json.dump(obj, f, ensure_ascii=False, indent=4)
63+
64+
def _build_dictionary(self, input_txt_path, lex_lines, dictionary_name):
65+
with open(input_txt_path, 'w', encoding='utf-8') as wf:
66+
wf.write("\n".join(lex_lines))
67+
68+
out_path = os.path.join(self.resource_dir, dictionary_name)
69+
out_stream = open(out_path, 'wb')
70+
lexicon_paths = [input_txt_path]
71+
matrix_input_stream = open(self.matrix_path, 'r', encoding='utf-8')
72+
73+
header = DictionaryHeader(SYSTEM_DICT_VERSION_2, int(time.time()), 'test')
74+
out_stream.write(header.to_bytes())
75+
builder = DictionaryBuilder(logger=self.logger)
76+
builder.build(lexicon_paths, matrix_input_stream, out_stream)
77+
out_stream.close()
78+
matrix_input_stream.close()
79+
80+
return out_path
81+
82+
def test_switch_dictionary(self):
83+
self._rewrite_json(self.sudachi_json_path, 'systemDict', 'small.dic') # relative path
84+
self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir)
85+
self.assertEqual(1, self.dict.lexicon.size())
86+
self._rewrite_json(self.sudachi_json_path, 'systemDict', self.small_dic_path) # abstract path
87+
self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir)
88+
self.assertEqual(1, self.dict.lexicon.size())
89+
90+
self._rewrite_json(self.sudachi_json_path, 'systemDict', 'core.dic')
91+
self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir)
92+
self.assertEqual(2, self.dict.lexicon.size())
93+
self._rewrite_json(self.sudachi_json_path, 'systemDict', self.core_dic_path)
94+
self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir)
95+
self.assertEqual(2, self.dict.lexicon.size())
96+
97+
self._rewrite_json(self.sudachi_json_path, 'systemDict', 'full.dic')
98+
self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir)
99+
self.assertEqual(3, self.dict.lexicon.size())
100+
self._rewrite_json(self.sudachi_json_path, 'systemDict', self.full_dic_path)
101+
self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir)
102+
self.assertEqual(3, self.dict.lexicon.size())

0 commit comments

Comments
 (0)