Add warn_deprecation

bact · bact · commit 6ccc11c19625 · 2024-12-13T12:38:40.000Z
diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py
@@ -4,8 +4,8 @@
 """
 Generic functions of tokenizers
 """
+
 import re
-import warnings
 from typing import Iterable, List, Union
 
 from pythainlp.tokenize import (
@@ -21,6 +21,7 @@
     rejoin_formatted_num,
     strip_whitespace,
 )
+from pythainlp.tools import warn_deprecation
 from pythainlp.util.trie import Trie, dict_trie
 
 
@@ -45,13 +46,9 @@ def clause_tokenize(doc: List[str]) -> List[List[str]]:
         # ['และ', 'คุณ', 'เล่น', 'มือถือ'],
         # ['ส่วน', 'น้อง', 'เขียน', 'โปรแกรม']]
     """
+    warn_deprecation("pythainlp.util.clause_tokenize", "", "5.0.5", "5.1")
     from pythainlp.tokenize.crfcls import segment
 
-    warnings.warn(
-        """
-                  clause_tokenize is no longer supported \
-                  and will be removed in version 5.1.
-        """, DeprecationWarning)
     return segment(doc)
 
 
@@ -71,6 +68,7 @@ def word_detokenize(
     ::
 
         from pythainlp.tokenize import word_detokenize
+
         print(word_detokenize(["เรา", "เล่น"]))
         # output: เราเล่น
     """
@@ -299,18 +297,19 @@ def word_tokenize(
         segments = segment(text)
     elif engine == "nlpo3":
         from pythainlp.tokenize.nlpo3 import segment
+
         # Currently cannot handle custom_dict from inside word_tokenize(),
         # due to difference in type.
-        #if isinstance(custom_dict, str):
+        # if isinstance(custom_dict, str):
         #    segments = segment(text, custom_dict=custom_dict)
-        #elif not isinstance(custom_dict, str) and not custom_dict:
+        # elif not isinstance(custom_dict, str) and not custom_dict:
         #    raise ValueError(
         #        f"""Tokenizer \"{engine}\":
         #        custom_dict must be a str.
         #        It is a dictionary name as assigned with load_dict().
         #        See pythainlp.tokenize.nlpo3.load_dict()"""
         #    )
-        #else:
+        # else:
         #    segments = segment(text)
         segments = segment(text)
     else:
diff --git a/pythainlp/tools/__init__.py b/pythainlp/tools/__init__.py
@@ -6,8 +6,12 @@
     "get_full_data_path",
     "get_pythainlp_data_path",
     "get_pythainlp_path",
+    "safe_print",
+    "warn_deprecation",
 ]
 
+from pythainlp.tools.core import safe_print, warn_deprecation
+
 from pythainlp.tools.path import (
     PYTHAINLP_DEFAULT_DATA_DIR,
     get_full_data_path,
diff --git a/pythainlp/tools/core.py b/pythainlp/tools/core.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+"""
+Generic support functions for PyThaiNLP.
+"""
+
+import sys
+import warnings
+
+
+def warn_deprecation(
+    deprecated_func: str,
+    replacing_func: str = "",
+    deprecated_version: str = "",
+    removal_version: str = "",
+):
+    """Warn about the deprecation of a function.
+
+    :param str deprecated_func: Name of the deprecated function.
+    :param str replacing_func: Name of the function to use instead (optional).
+    :param str version: PyThaiNLP version in which the function will be deprecated (optional).
+    """
+    message = f"The '{deprecated_func}' function is deprecated"
+    if deprecated_version:
+        message += f" since {deprecated_version}"
+    if not removal_version:
+        removal_version = "a future release"
+    message += f" and will be removed in {removal_version}."
+    if replacing_func:
+        message += f" Please use '{replacing_func}' instead."
+    warnings.warn(message, DeprecationWarning, stacklevel=2)
+
+
+def safe_print(text: str):
+    """Print text to console, handling UnicodeEncodeError.
+
+    :param text: Text to print.
+    :type text: str
+    """
+    try:
+        print(text)
+    except UnicodeEncodeError:
+        print(
+            text.encode(sys.stdout.encoding, errors="replace").decode(
+                sys.stdout.encoding
+            )
+        )
diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
@@ -14,7 +14,7 @@
 from pythainlp import thai_lead_vowels as lead_v
 from pythainlp import thai_tonemarks as tonemarks
 from pythainlp.tokenize import word_tokenize
-
+from pythainlp.tools import warn_deprecation
 
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
 _RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
@@ -331,6 +331,9 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
         # output: ['คน', 'คน', 'นก']
     """
     warn_deprecation(
-        "pythainlp.util.maiyamok", "pythainlp.util.expand_maiyamok", "5.2"
+        "pythainlp.util.maiyamok",
+        "pythainlp.util.expand_maiyamok",
+        "5.0.5",
+        "5.2",
     )
     return expand_maiyamok(sent)