fix: CI error

new5558 · new5558 · commit 7532488c9d6a · 2025-01-11T20:29:12.000+07:00
diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
@@ -8,7 +8,7 @@
 :See Also:
     * `GitHub repository <https://github.com/PyThaiNLP/attacut>`_
 """
-from typing import List
+from typing import Dict, List
 
 from attacut import Tokenizer
 
@@ -25,7 +25,8 @@ def __init__(self, model="attacut-sc"):
     def tokenize(self, text: str) -> List[str]:
         return self._tokenizer.tokenize(text)
 
-_tokenizers = {}
+
+_tokenizers: Dict[str, AttacutTokenizer] = {}
 
 
 def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -41,7 +42,7 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
     """
     if not text or not isinstance(text, str):
         return []
-    
+
     global _tokenizers
     if model not in _tokenizers:
         _tokenizers[model] = AttacutTokenizer(model)
diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py
@@ -12,7 +12,7 @@
 
 """
 import re
-from typing import List, Union
+from typing import Dict, List, Union
 
 from pythainlp import thai_tonemarks
 from pythainlp.tokenize import DEFAULT_WORD_DICT_TRIE
@@ -149,11 +149,10 @@ def tokenize(self, text: str) -> List[str]:
         return tokens
 
 
-_tokenizers = {}
+_tokenizers: Dict[int, LongestMatchTokenizer] = {}
 
-def segment(
-    text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE
-) -> List[str]:
+
+def segment(text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE) -> List[str]:
     """
     Dictionary-based longest matching word segmentation.
 
diff --git a/tests/core/test_tokenize.py b/tests/core/test_tokenize.py
@@ -411,16 +411,16 @@ def test_longest_custom_dict(self):
             ["ทดสอบ", "  ", "ทดสอบ"],
         )
         self.assertEqual(
-            word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict={'ปวดเฉียบพลัน'}),
+            word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict=dict_trie(["ปวดเฉียบพลัน"])),
             ["ปวดเฉียบพลัน"],
         )
         self.assertEqual(
-            word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict={'ทดสอบท'}),
-             ['ทดสอบท', 'ดสอบ'],
+            word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict=dict_trie(["ทดสอบท"]) ),
+            ["ทดสอบท", "ดสอบ"],
         )
         self.assertEqual(
             word_tokenize("ทดสอบ  ทดสอบ", engine="longest"),
-             ["ทดสอบ", "  ", "ทดสอบ"],
+            ["ทดสอบ", "  ", "ทดสอบ"],
         )
 
     def test_mm(self):

Original file line number	Diff line number	Diff line change
`@@ -411,16 +411,16 @@ def test_longest_custom_dict(self):`
`411`	`411`	`["ทดสอบ", " ", "ทดสอบ"],`
`412`	`412`	`)`
`413`	`413`	`self.assertEqual(`
`414`		`- word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict={'ปวดเฉียบพลัน'}),`
	`414`	`+ word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict=dict_trie(["ปวดเฉียบพลัน"])),`
`415`	`415`	`["ปวดเฉียบพลัน"],`
`416`	`416`	`)`
`417`	`417`	`self.assertEqual(`
`418`		`- word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict={'ทดสอบท'}),`
`419`		`- ['ทดสอบท', 'ดสอบ'],`
	`418`	`+ word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict=dict_trie(["ทดสอบท"]) ),`
	`419`	`+ ["ทดสอบท", "ดสอบ"],`
`420`	`420`	`)`
`421`	`421`	`self.assertEqual(`
`422`	`422`	`word_tokenize("ทดสอบ ทดสอบ", engine="longest"),`
`423`		`- ["ทดสอบ", " ", "ทดสอบ"],`
	`423`	`+ ["ทดสอบ", " ", "ทดสอบ"],`
`424`	`424`	`)`
`425`	`425`
`426`	`426`	`def test_mm(self):`