Skip to content

Commit 12d7768

Browse files
authored
Merge pull request #1231 from PyThaiNLP/copilot/fix-type-hints-inconsistent
Fix type hints for Python 3.9 runtime compatibility
2 parents 59695bd + b93128a commit 12d7768

File tree

11 files changed

+26
-18
lines changed

11 files changed

+26
-18
lines changed

pythainlp/benchmarks/word_tokenization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict:
196196
}
197197

198198

199-
def _binary_representation(txt: str, verbose: bool = False):
199+
def _binary_representation(txt: str, verbose: bool = False) -> np.ndarray:
200200
"""Transform text into {0, 1} sequence.
201201
202202
where (1) indicates that the corresponding character is the beginning of

pythainlp/coref/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
def coreference_resolution(
1010
texts: list[str], model_name: str = "han-coref-v1.0", device: str = "cpu"
11-
):
11+
) -> list[dict]:
1212
"""Coreference Resolution
1313
1414
:param List[str] texts: list of texts to apply coreference resolution to

pythainlp/spell/core.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,16 @@
88

99
import itertools
1010
from functools import lru_cache
11+
from typing import TYPE_CHECKING
1112

1213
from pythainlp.spell import DEFAULT_SPELL_CHECKER
1314

15+
if TYPE_CHECKING:
16+
from pythainlp.spell.pn import NorvigSpellChecker
17+
1418

1519
@lru_cache
16-
def default_spell_checker():
20+
def default_spell_checker() -> "NorvigSpellChecker":
1721
"""Lazy load default spell checker with cache"""
1822
return DEFAULT_SPELL_CHECKER()
1923

pythainlp/spell/symspellpy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
_load_lock = threading.Lock() # Thread safety for lazy loading
3434

3535

36-
def _get_sym_spell():
36+
def _get_sym_spell() -> SymSpell:
3737
"""Lazy load the symspell instance.
3838
3939
This function uses a lock to ensure thread-safe initialization.

pythainlp/tag/perceptron.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,36 +32,36 @@
3232
_TUD_TAGGER = None
3333

3434

35-
def _orchid_tagger():
35+
def _orchid_tagger() -> PerceptronTagger:
3636
global _ORCHID_TAGGER
3737
if not _ORCHID_TAGGER:
3838
_ORCHID_TAGGER = PerceptronTagger(path=_ORCHID_PATH)
3939
return _ORCHID_TAGGER
4040

4141

42-
def _pud_tagger():
42+
def _pud_tagger() -> PerceptronTagger:
4343
global _PUD_TAGGER
4444
if not _PUD_TAGGER:
4545
_PUD_TAGGER = PerceptronTagger(path=_PUD_PATH)
4646
return _PUD_TAGGER
4747

4848

49-
def _blackboard_tagger():
49+
def _blackboard_tagger() -> PerceptronTagger:
5050
global _BLACKBOARD_TAGGER
5151
if not _BLACKBOARD_TAGGER:
5252
path = get_corpus_path(_BLACKBOARD_NAME)
5353
_BLACKBOARD_TAGGER = PerceptronTagger(path=path)
5454
return _BLACKBOARD_TAGGER
5555

5656

57-
def _tdtb():
57+
def _tdtb() -> PerceptronTagger:
5858
global _TDTB_TAGGER
5959
if not _TDTB_TAGGER:
6060
_TDTB_TAGGER = PerceptronTagger(path=_TDTB_PATH)
6161
return _TDTB_TAGGER
6262

6363

64-
def _tud_tagger():
64+
def _tud_tagger() -> PerceptronTagger:
6565
global _TUD_TAGGER
6666
if not _TUD_TAGGER:
6767
_TUD_TAGGER = PerceptronTagger(path=_TUD_PATH)

pythainlp/tag/thainer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,5 +201,5 @@ def get_ner(
201201
return sent_ner
202202

203203
@staticmethod
204-
def __extract_features(doc: list[str]) -> list[dict[str, str | bool]]:
204+
def __extract_features(doc: list[str]) -> list[dict[str, Union[str, bool]]]:
205205
return [_doc2features(doc, i) for i in range(len(doc))]

pythainlp/tag/unigram.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,23 @@
3333
_TUD_TAGGER = None
3434

3535

36-
def _orchid_tagger():
36+
def _orchid_tagger() -> dict:
3737
global _ORCHID_TAGGER
3838
if not _ORCHID_TAGGER:
3939
with open(_ORCHID_PATH, encoding="utf-8-sig") as fh:
4040
_ORCHID_TAGGER = json.load(fh)
4141
return _ORCHID_TAGGER
4242

4343

44-
def _pud_tagger():
44+
def _pud_tagger() -> dict:
4545
global _PUD_TAGGER
4646
if not _PUD_TAGGER:
4747
with open(_PUD_PATH, encoding="utf-8-sig") as fh:
4848
_PUD_TAGGER = json.load(fh)
4949
return _PUD_TAGGER
5050

5151

52-
def _blackboard_tagger():
52+
def _blackboard_tagger() -> dict:
5353
global _BLACKBOARD_TAGGER
5454
if not _BLACKBOARD_TAGGER:
5555
path = get_corpus_path(_BLACKBOARD_NAME)
@@ -58,15 +58,15 @@ def _blackboard_tagger():
5858
return _BLACKBOARD_TAGGER
5959

6060

61-
def _thai_tdtb():
61+
def _thai_tdtb() -> dict:
6262
global _TDTB_TAGGER
6363
if not _TDTB_TAGGER:
6464
with open(_TDTB_PATH, encoding="utf-8-sig") as fh:
6565
_TDTB_TAGGER = json.load(fh)
6666
return _TDTB_TAGGER
6767

6868

69-
def _tud_tagger():
69+
def _tud_tagger() -> dict:
7070
global _TUD_TAGGER
7171
if not _TUD_TAGGER:
7272
with open(_TUD_PATH, encoding="utf-8-sig") as fh:

pythainlp/tokenize/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
"""Tokenizers at different levels of linguistic analysis.
55
"""
6+
from __future__ import annotations
67

78
__all__ = [
89
"thai2fit_tokenizer",

pythainlp/tokenize/han_solo.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import threading
1212
from importlib.resources import as_file, files
13+
from typing import Optional
1314

1415
try:
1516
import pycrfsuite
@@ -47,7 +48,7 @@ def _get_tagger() -> pycrfsuite.Tagger:
4748
class Featurizer:
4849
# This class from ssg at https://github.com/ponrawee/ssg.
4950

50-
def __init__(self, N: int = 2, sequence_size: int = 1, delimiter: str | None = None) -> None:
51+
def __init__(self, N: int = 2, sequence_size: int = 1, delimiter: Optional[str] = None) -> None:
5152
self.N = N
5253
self.delimiter = delimiter
5354
self.radius = N + sequence_size

pythainlp/transliterate/core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# SPDX-License-Identifier: Apache-2.0
44
from __future__ import annotations
55

6+
from typing import Callable
7+
68
DEFAULT_ROMANIZE_ENGINE = "royin"
79
DEFAULT_TRANSLITERATE_ENGINE = "thaig2p"
810
DEFAULT_PRONUNCIATE_ENGINE = "w2p"
@@ -70,7 +72,7 @@ def romanize(
7072
7173
"""
7274

73-
def select_romanize_engine(engine: str):
75+
def select_romanize_engine(engine: str) -> Callable[[str], str]:
7476
if engine == "thai2rom":
7577
from pythainlp.transliterate.thai2rom import romanize
7678
elif engine == "thai2rom_onnx":

0 commit comments

Comments
 (0)