Skip to content

Commit 03b92ab

Browse files
authored
Merge pull request #1235 from PyThaiNLP/copilot/fix-mypy-issues
Refactor type: ignore suppressions to proper type fixes
2 parents bb14ad5 + c1afb66 commit 03b92ab

File tree

8 files changed

+26
-28
lines changed

8 files changed

+26
-28
lines changed

pythainlp/soundex/prayut_and_somchaip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def prayut_and_somchaip(text: str, length: int = 4) -> str:
8787
elif chars[i] in _C9 and i != 0:
8888
chars[i] = "9"
8989
else:
90-
chars[i] = None
90+
chars[i] = None # type: ignore[call-overload]
9191
i += 1
9292
chars = list("".join([i for i in chars if i is not None]))
9393
return "".join(chars[-length:])

pythainlp/summarize/freq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __rank(ranking, n: int):
2929
def __compute_frequencies(
3030
self, word_tokenized_sents: list[list[str]]
3131
) -> defaultdict:
32-
word_freqs = defaultdict(int)
32+
word_freqs: defaultdict[str, float] = defaultdict(int)
3333
for sent in word_tokenized_sents:
3434
for word in sent:
3535
if word not in self.__stopwords:
@@ -54,7 +54,7 @@ def summarize(
5454
word_tokenize(sent, engine=tokenizer) for sent in sents
5555
]
5656
self.__freq = self.__compute_frequencies(word_tokenized_sents)
57-
ranking = defaultdict(int)
57+
ranking: defaultdict[int, float] = defaultdict(int)
5858

5959
for i, sent in enumerate(word_tokenized_sents):
6060
for w in sent:

pythainlp/tokenize/han_solo.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import threading
1212
from importlib.resources import as_file, files
13-
from typing import Optional, Union
13+
from typing import Optional, Union, cast
1414

1515
try:
1616
import pycrfsuite
@@ -101,9 +101,9 @@ def featurize(
101101
if indiv_char:
102102
left_key = "|".join([str(relative_index_left), char_left])
103103
if return_type == "dict":
104-
features[left_key] = 1
104+
cast(dict[str, int], features)[left_key] = 1
105105
else:
106-
features.append(left_key)
106+
cast(list[str], features).append(left_key)
107107

108108
abs_index_right += (
109109
1 # สมมุติคือตำแหน่งที่ 0 จะได้ 0, 1, 2, 3, 4 (radius = 5)
@@ -119,9 +119,9 @@ def featurize(
119119
[str(relative_index_right), char_right]
120120
)
121121
if return_type == "dict":
122-
features[right_key] = 1
122+
cast(dict[str, int], features)[right_key] = 1
123123
else:
124-
features.append(right_key)
124+
cast(list[str], features).append(right_key)
125125

126126
counter += 1
127127

@@ -130,13 +130,14 @@ def featurize(
130130
ngram = chars[i : i + self.N]
131131
ngram_key = "|".join([str(i - self.radius), ngram])
132132
if return_type == "dict":
133-
features[ngram_key] = 1
133+
cast(dict[str, int], features)[ngram_key] = 1
134134
else:
135-
features.append(ngram_key)
135+
cast(list[str], features).append(ngram_key)
136136
all_features.append(features)
137137
if return_type == "list":
138-
cut = str(cut)
139-
all_labels.append(cut)
138+
all_labels.append(str(cut))
139+
else:
140+
all_labels.append(cut)
140141

141142
return {"X": all_features, "Y": all_labels}
142143

pythainlp/tokenize/longest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ def __longest_matching(self, text: str, begin_pos: int) -> str:
109109
def __segment(self, text: str):
110110
begin_pos = 0
111111
len_text = len(text)
112-
tokens = []
113-
token_statuses = []
112+
tokens: list[str] = []
113+
token_statuses: list[int] = []
114114
while begin_pos < len_text:
115115
match = self.__longest_matching(text, begin_pos)
116116
if not match:
@@ -139,7 +139,7 @@ def __segment(self, text: str):
139139
begin_pos += len(match)
140140

141141
# Group consecutive spaces into one token
142-
grouped_tokens = []
142+
grouped_tokens: list[str] = []
143143
for token in tokens:
144144
if (
145145
token.isspace()

pythainlp/tokenize/multi_cut.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def _multicut(
5555
if not custom_dict:
5656
custom_dict = word_dict_trie()
5757
len_text = len(text)
58-
words_at = defaultdict(list) # main data structure
58+
words_at: defaultdict[int, list[str]] = defaultdict(list) # main data structure
5959

6060
def serialize(p, p2): # helper function
6161
for w in words_at[p]:

pythainlp/transliterate/wunsen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class WunsenTransliterate:
2828
"""
2929

3030
def __init__(self) -> None:
31-
self.thap_value: Optional[object] = None
31+
self.thap_value: Optional[ThapSap] = None
3232
self.lang: Optional[str] = None
3333
self.jp_input: Optional[str] = None
3434
self.zh_sandhi: Optional[bool] = None

pythainlp/util/date.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,12 @@ def convert_years(year: str, src="be", target="ad") -> str:
202202
return output_year
203203

204204

205-
def _find_month(text: str) -> Optional[int]:
205+
def _find_month(text: str) -> int:
206206
for i, m in enumerate(thai_full_month_lists):
207207
for j in m:
208208
if j in text:
209209
return i + 1
210-
return None
210+
return 0 # Not found in list
211211

212212

213213
def thai_strptime(
@@ -254,9 +254,6 @@ def thai_strptime(
254254
# tzinfo=zoneinfo.ZoneInfo(key='Asia/Bangkok')
255255
# )
256256
"""
257-
d = ""
258-
m = ""
259-
y = ""
260257
fmt = fmt.replace("%-m", "%m")
261258
fmt = fmt.replace("%-d", "%d")
262259
fmt = fmt.replace("%b", "%B")
@@ -290,7 +287,7 @@ def thai_strptime(
290287
second: Union[int, str] = 0
291288
f: Union[int, str] = 0
292289
d = data["d"]
293-
m = _find_month(data["B"])
290+
m: int = _find_month(data["B"])
294291
y = data["Y"]
295292
if "H" in keys:
296293
hour = data["H"]
@@ -314,7 +311,7 @@ def thai_strptime(
314311
y = convert_years(y, src="be", target="ad")
315312
return datetime(
316313
year=int(y),
317-
month=int(m),
314+
month=m,
318315
day=int(d),
319316
hour=int(hour),
320317
minute=int(minute),

pythainlp/util/morse.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ def morse_encode(text: str, lang: str = "th") -> str:
154154
"""
155155
if lang == "th": # Thai
156156
return " ".join(
157-
map(lambda x, g=THAI_MORSE_CODE.get: g(x, " "), text.upper())
157+
THAI_MORSE_CODE.get(char, " ") for char in text.upper()
158158
)
159159
elif lang == "en": # English
160160
return " ".join(
161-
map(lambda x, g=ENGLISH_MORSE_CODE.get: g(x, " "), text.upper())
161+
ENGLISH_MORSE_CODE.get(char, " ") for char in text.upper()
162162
)
163163
else:
164164
raise NotImplementedError(f"This function doesn't support {lang}.")
@@ -187,12 +187,12 @@ def morse_decode(morse_text: str, lang: str = "th") -> str:
187187
"""
188188
if lang == "th":
189189
ans = "".join(
190-
map(lambda x, g=decodingthai.get: g(x, ""), morse_text.split(" "))
190+
decodingthai.get(code, "") for code in morse_text.split(" ")
191191
)
192192
return "".join(ans.split())
193193
elif lang == "en":
194194
ans = "".join(
195-
map(lambda x, g=decodingeng.get: g(x, " "), morse_text.split(" "))
195+
decodingeng.get(code, " ") for code in morse_text.split(" ")
196196
)
197197
return " ".join(ans.split())
198198
else:

0 commit comments

Comments
 (0)