Skip to content
Merged
2 changes: 1 addition & 1 deletion pythainlp/soundex/prayut_and_somchaip.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,5 @@ def prayut_and_somchaip(text: str, length: int = 4) -> str:
else:
chars[i] = None # type: ignore[call-overload]
i += 1
chars = list("".join([i for i in chars if i is not None]))
chars = list("".join(filter(None, chars)))
return "".join(chars[-length:])
2 changes: 1 addition & 1 deletion pythainlp/spell/words_spelling_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def __init__(self):
self.model_name, "list_word-spelling-correction-char2vec.txt"
)
) as f:
self.list_word = [i.strip() for i in f.readlines()]
self.list_word = list(map(str.strip, f.readlines()))
super().__init__(self.model_path, self.model_onnx, self.list_word)


Expand Down
6 changes: 2 additions & 4 deletions pythainlp/tokenize/thaisumcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def middle_cut(sentences: list[str]) -> list[str]:
# Split all result parts by <stop> and filter
all_sentences = (s.strip() for part in result_parts for s in part.split("<stop>"))

return [s for s in all_sentences if s]
return list(filter(None, all_sentences))


class ThaiSentenceSegmentor:
Expand Down Expand Up @@ -358,9 +358,7 @@ def split_into_sentences(self, text: str, isMiddleCut: bool = False) -> list[str
text = text.replace("!", "!<stop>")
text = text.replace("<prd>", ".")
sentences = text.split("<stop>")
sentences = [s.strip() for s in sentences]
if "" in sentences:
sentences.remove("")
sentences = list(map(str.strip, sentences))
if "nan" in sentences:
sentences.remove("nan")

Expand Down
6 changes: 3 additions & 3 deletions pythainlp/ulmfit/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def replace_wrep_post(toks: Collection[str]) -> list[str]:
else:
res.append(previous_word)
previous_word = current_word
return [x for x in res[1:] if x is not None]
return list(filter(None, res[1:]))


def rm_useless_newlines(text: str) -> str:
Expand Down Expand Up @@ -201,7 +201,7 @@ def lowercase_all(toks: Collection[str]) -> list[str]:
"""Lowercase all English words;
English words in Thai texts don't usually have nuances of capitalization.
"""
return [tok.lower() for tok in toks]
return list(map(str.lower, toks))


def replace_rep_nonum(text: str) -> str:
Expand Down Expand Up @@ -264,7 +264,7 @@ def replace_wrep_post_nonum(toks: Collection[str]) -> list[str]:
else:
res.append(previous_word)
previous_word = current_word
return [x for x in res[1:] if x is not None]
return list(filter(None, res[1:]))


def remove_space(toks: Collection[str]) -> list[str]:
Expand Down
2 changes: 1 addition & 1 deletion pythainlp/util/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
dates_list = (
"("
+ "|".join(
[str(i) for i in range(32, 0, -1)]
list(map(str, range(32, 0, -1)))
+ ["0" + str(i) for i in range(1, 10)]
)
+ ")"
Expand Down
2 changes: 1 addition & 1 deletion pythainlp/util/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def expand_maiyamok(sent: Union[str, list[str]]) -> list[str]:
temp_toks: list[str] = []
for token in sent:
toks = re_yamok.split(token)
toks = [tok for tok in toks if tok] # remove empty string ("")
toks = list(filter(None, toks)) # remove empty string ("")
temp_toks.extend(toks)
sent = temp_toks
del temp_toks
Expand Down
Loading