|
| 1 | +import json |
| 2 | +import os |
| 3 | +import shutil |
| 4 | +import tempfile |
| 5 | +import time |
| 6 | +from logging import getLogger |
| 7 | +from unittest import TestCase |
| 8 | + |
| 9 | +from sudachipy.dictionary import Dictionary |
| 10 | +from sudachipy.dictionarylib import SYSTEM_DICT_VERSION_2 |
| 11 | +from sudachipy.dictionarylib.dictionarybuilder import DictionaryBuilder |
| 12 | +from sudachipy.dictionarylib.dictionaryheader import DictionaryHeader |
| 13 | + |
| 14 | + |
| 15 | +class TestSwitchDictionary(TestCase): |
| 16 | + |
| 17 | + def setUp(self): |
| 18 | + self.logger = getLogger() |
| 19 | + self.logger.disabled = True |
| 20 | + |
| 21 | + self.temp_dir = tempfile.mkdtemp() |
| 22 | + self.resource_dir = os.path.join(self.temp_dir, 'resources') |
| 23 | + os.makedirs(self.resource_dir) |
| 24 | + |
| 25 | + test_resource_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'resources') |
| 26 | + self.char_def_path = os.path.join(self.resource_dir, 'char.def') |
| 27 | + shutil.copy(os.path.join(test_resource_dir, 'char.def'), self.char_def_path) |
| 28 | + |
| 29 | + self.sudachi_json_path = os.path.join(self.resource_dir, 'sudachi.json') |
| 30 | + shutil.copy(os.path.join(test_resource_dir, 'sudachi.json'), self.sudachi_json_path) |
| 31 | + self._rewrite_json(self.sudachi_json_path, 'userDict', []) |
| 32 | + |
| 33 | + self.matrix_path = os.path.join(self.resource_dir, 'matrix.txt') |
| 34 | + with open(self.matrix_path, 'w', encoding='utf-8') as wf: |
| 35 | + wf.write('1 1\n0 0 200\n') |
| 36 | + |
| 37 | + small_lexs = ["島,0,0,0,島,名詞,普通名詞,一般,*,*,*,シマ,島,*,A,*,*,*"] |
| 38 | + core_lexs = ["徳島本町,0,0,0,徳島本町,名詞,固有名詞,地名,一般,*,*,トクシマホンチョウ,徳島本町,*,A,*,*,*,*"] |
| 39 | + notcore_lexs = ["徳島堰,0,0,0,徳島堰,名詞,固有名詞,一般,*,*,*,トクシマセギ,徳島堰,*,A,*,*,*"] |
| 40 | + |
| 41 | + small_lines = small_lexs |
| 42 | + core_lines = small_lexs + core_lexs |
| 43 | + full_lines = small_lexs + core_lexs + notcore_lexs |
| 44 | + |
| 45 | + self.small_txt_path = os.path.join(self.resource_dir, 'small.csv') |
| 46 | + self.core_txt_path = os.path.join(self.resource_dir, 'core.csv') |
| 47 | + self.full_txt_path = os.path.join(self.resource_dir, 'full.csv') |
| 48 | + |
| 49 | + self.small_dic_path = self._build_dictionary(self.small_txt_path, small_lines, 'small.dic') |
| 50 | + self.core_dic_path = self._build_dictionary(self.core_txt_path, core_lines, 'core.dic') |
| 51 | + self.full_dic_path = self._build_dictionary(self.full_txt_path, full_lines, 'full.dic') |
| 52 | + |
| 53 | + def tearDown(self): |
| 54 | + shutil.rmtree(self.temp_dir) |
| 55 | + |
| 56 | + @staticmethod |
| 57 | + def _rewrite_json(json_file_path, k, v): |
| 58 | + with open(json_file_path, 'r') as f: |
| 59 | + obj = json.load(f) |
| 60 | + obj[k] = v |
| 61 | + with open(json_file_path, 'w') as f: |
| 62 | + json.dump(obj, f, ensure_ascii=False, indent=4) |
| 63 | + |
| 64 | + def _build_dictionary(self, input_txt_path, lex_lines, dictionary_name): |
| 65 | + with open(input_txt_path, 'w', encoding='utf-8') as wf: |
| 66 | + wf.write("\n".join(lex_lines)) |
| 67 | + |
| 68 | + out_path = os.path.join(self.resource_dir, dictionary_name) |
| 69 | + out_stream = open(out_path, 'wb') |
| 70 | + lexicon_paths = [input_txt_path] |
| 71 | + matrix_input_stream = open(self.matrix_path, 'r', encoding='utf-8') |
| 72 | + |
| 73 | + header = DictionaryHeader(SYSTEM_DICT_VERSION_2, int(time.time()), 'test') |
| 74 | + out_stream.write(header.to_bytes()) |
| 75 | + builder = DictionaryBuilder(logger=self.logger) |
| 76 | + builder.build(lexicon_paths, matrix_input_stream, out_stream) |
| 77 | + out_stream.close() |
| 78 | + matrix_input_stream.close() |
| 79 | + |
| 80 | + return out_path |
| 81 | + |
| 82 | + def test_switch_dictionary(self): |
| 83 | + self._rewrite_json(self.sudachi_json_path, 'systemDict', 'small.dic') # relative path |
| 84 | + self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir) |
| 85 | + self.assertEqual(1, self.dict.lexicon.size()) |
| 86 | + self._rewrite_json(self.sudachi_json_path, 'systemDict', self.small_dic_path) # abstract path |
| 87 | + self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir) |
| 88 | + self.assertEqual(1, self.dict.lexicon.size()) |
| 89 | + |
| 90 | + self._rewrite_json(self.sudachi_json_path, 'systemDict', 'core.dic') |
| 91 | + self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir) |
| 92 | + self.assertEqual(2, self.dict.lexicon.size()) |
| 93 | + self._rewrite_json(self.sudachi_json_path, 'systemDict', self.core_dic_path) |
| 94 | + self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir) |
| 95 | + self.assertEqual(2, self.dict.lexicon.size()) |
| 96 | + |
| 97 | + self._rewrite_json(self.sudachi_json_path, 'systemDict', 'full.dic') |
| 98 | + self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir) |
| 99 | + self.assertEqual(3, self.dict.lexicon.size()) |
| 100 | + self._rewrite_json(self.sudachi_json_path, 'systemDict', self.full_dic_path) |
| 101 | + self.dict = Dictionary(config_path=self.sudachi_json_path, resource_dir=self.resource_dir) |
| 102 | + self.assertEqual(3, self.dict.lexicon.size()) |
0 commit comments