Skip to content

Commit 7532488

Browse files
committed
fix: CI error
1 parent 9efd6e7 commit 7532488

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed

pythainlp/tokenize/attacut.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
:See Also:
99
* `GitHub repository <https://github.com/PyThaiNLP/attacut>`_
1010
"""
11-
from typing import List
11+
from typing import Dict, List
1212

1313
from attacut import Tokenizer
1414

@@ -25,7 +25,8 @@ def __init__(self, model="attacut-sc"):
2525
def tokenize(self, text: str) -> List[str]:
2626
return self._tokenizer.tokenize(text)
2727

28-
_tokenizers = {}
28+
29+
_tokenizers: Dict[str, AttacutTokenizer] = {}
2930

3031

3132
def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -41,7 +42,7 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
4142
"""
4243
if not text or not isinstance(text, str):
4344
return []
44-
45+
4546
global _tokenizers
4647
if model not in _tokenizers:
4748
_tokenizers[model] = AttacutTokenizer(model)

pythainlp/tokenize/longest.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
1313
"""
1414
import re
15-
from typing import List, Union
15+
from typing import Dict, List, Union
1616

1717
from pythainlp import thai_tonemarks
1818
from pythainlp.tokenize import DEFAULT_WORD_DICT_TRIE
@@ -149,11 +149,10 @@ def tokenize(self, text: str) -> List[str]:
149149
return tokens
150150

151151

152-
_tokenizers = {}
152+
_tokenizers: Dict[int, LongestMatchTokenizer] = {}
153153

154-
def segment(
155-
text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE
156-
) -> List[str]:
154+
155+
def segment(text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE) -> List[str]:
157156
"""
158157
Dictionary-based longest matching word segmentation.
159158

tests/core/test_tokenize.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,16 +411,16 @@ def test_longest_custom_dict(self):
411411
["ทดสอบ", " ", "ทดสอบ"],
412412
)
413413
self.assertEqual(
414-
word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict={'ปวดเฉียบพลัน'}),
414+
word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict=dict_trie(["ปวดเฉียบพลัน"])),
415415
["ปวดเฉียบพลัน"],
416416
)
417417
self.assertEqual(
418-
word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict={'ทดสอบท'}),
419-
['ทดสอบท', 'ดสอบ'],
418+
word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict=dict_trie(["ทดสอบท"]) ),
419+
["ทดสอบท", "ดสอบ"],
420420
)
421421
self.assertEqual(
422422
word_tokenize("ทดสอบ ทดสอบ", engine="longest"),
423-
["ทดสอบ", " ", "ทดสอบ"],
423+
["ทดสอบ", " ", "ทดสอบ"],
424424
)
425425

426426
def test_mm(self):

0 commit comments

Comments
 (0)