Skip to content

Commit 6ccc11c

Browse files
committed
Add warn_deprecation
1 parent b3e2d6e commit 6ccc11c

File tree

4 files changed

+65
-11
lines changed

4 files changed

+65
-11
lines changed

pythainlp/tokenize/core.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
"""
55
Generic functions of tokenizers
66
"""
7+
78
import re
8-
import warnings
99
from typing import Iterable, List, Union
1010

1111
from pythainlp.tokenize import (
@@ -21,6 +21,7 @@
2121
rejoin_formatted_num,
2222
strip_whitespace,
2323
)
24+
from pythainlp.tools import warn_deprecation
2425
from pythainlp.util.trie import Trie, dict_trie
2526

2627

@@ -45,13 +46,9 @@ def clause_tokenize(doc: List[str]) -> List[List[str]]:
4546
# ['และ', 'คุณ', 'เล่น', 'มือถือ'],
4647
# ['ส่วน', 'น้อง', 'เขียน', 'โปรแกรม']]
4748
"""
49+
warn_deprecation("pythainlp.util.clause_tokenize", "", "5.0.5", "5.1")
4850
from pythainlp.tokenize.crfcls import segment
4951

50-
warnings.warn(
51-
"""
52-
clause_tokenize is no longer supported \
53-
and will be removed in version 5.1.
54-
""", DeprecationWarning)
5552
return segment(doc)
5653

5754

@@ -71,6 +68,7 @@ def word_detokenize(
7168
::
7269
7370
from pythainlp.tokenize import word_detokenize
71+
7472
print(word_detokenize(["เรา", "เล่น"]))
7573
# output: เราเล่น
7674
"""
@@ -299,18 +297,19 @@ def word_tokenize(
299297
segments = segment(text)
300298
elif engine == "nlpo3":
301299
from pythainlp.tokenize.nlpo3 import segment
300+
302301
# Currently cannot handle custom_dict from inside word_tokenize(),
303302
# due to difference in type.
304-
#if isinstance(custom_dict, str):
303+
# if isinstance(custom_dict, str):
305304
# segments = segment(text, custom_dict=custom_dict)
306-
#elif not isinstance(custom_dict, str) and not custom_dict:
305+
# elif not isinstance(custom_dict, str) and not custom_dict:
307306
# raise ValueError(
308307
# f"""Tokenizer \"{engine}\":
309308
# custom_dict must be a str.
310309
# It is a dictionary name as assigned with load_dict().
311310
# See pythainlp.tokenize.nlpo3.load_dict()"""
312311
# )
313-
#else:
312+
# else:
314313
# segments = segment(text)
315314
segments = segment(text)
316315
else:

pythainlp/tools/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66
"get_full_data_path",
77
"get_pythainlp_data_path",
88
"get_pythainlp_path",
9+
"safe_print",
10+
"warn_deprecation",
911
]
1012

13+
from pythainlp.tools.core import safe_print, warn_deprecation
14+
1115
from pythainlp.tools.path import (
1216
PYTHAINLP_DEFAULT_DATA_DIR,
1317
get_full_data_path,

pythainlp/tools/core.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# -*- coding: utf-8 -*-
2+
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""
5+
Generic support functions for PyThaiNLP.
6+
"""
7+
8+
import sys
9+
import warnings
10+
11+
12+
def warn_deprecation(
13+
deprecated_func: str,
14+
replacing_func: str = "",
15+
deprecated_version: str = "",
16+
removal_version: str = "",
17+
):
18+
"""Warn about the deprecation of a function.
19+
20+
:param str deprecated_func: Name of the deprecated function.
21+
:param str replacing_func: Name of the function to use instead (optional).
22+
:param str version: PyThaiNLP version in which the function will be deprecated (optional).
23+
"""
24+
message = f"The '{deprecated_func}' function is deprecated"
25+
if deprecated_version:
26+
message += f" since {deprecated_version}"
27+
if not removal_version:
28+
removal_version = "a future release"
29+
message += f" and will be removed in {removal_version}."
30+
if replacing_func:
31+
message += f" Please use '{replacing_func}' instead."
32+
warnings.warn(message, DeprecationWarning, stacklevel=2)
33+
34+
35+
def safe_print(text: str):
36+
"""Print text to console, handling UnicodeEncodeError.
37+
38+
:param text: Text to print.
39+
:type text: str
40+
"""
41+
try:
42+
print(text)
43+
except UnicodeEncodeError:
44+
print(
45+
text.encode(sys.stdout.encoding, errors="replace").decode(
46+
sys.stdout.encoding
47+
)
48+
)

pythainlp/util/normalize.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pythainlp import thai_lead_vowels as lead_v
1515
from pythainlp import thai_tonemarks as tonemarks
1616
from pythainlp.tokenize import word_tokenize
17-
17+
from pythainlp.tools import warn_deprecation
1818

1919
_DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
2020
_RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
@@ -331,6 +331,9 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
331331
# output: ['คน', 'คน', 'นก']
332332
"""
333333
warn_deprecation(
334-
"pythainlp.util.maiyamok", "pythainlp.util.expand_maiyamok", "5.2"
334+
"pythainlp.util.maiyamok",
335+
"pythainlp.util.expand_maiyamok",
336+
"5.0.5",
337+
"5.2",
335338
)
336339
return expand_maiyamok(sent)

0 commit comments

Comments
 (0)