Revert "fixErr: specialtoken 无法添加" (#2387)

lugimzzz · web-flow · commit 787774afd5b4 · 2025-06-27T20:12:50.000+08:00
diff --git a/paddleformers/transformers/tokenizer_utils_base.py b/paddleformers/transformers/tokenizer_utils_base.py
@@ -25,6 +25,7 @@
 from collections import UserDict
 from dataclasses import dataclass
 from enum import Enum
+from functools import cache
 from typing import (
     Any,
     Dict,
@@ -1260,6 +1261,7 @@ def special_tokens_map_extended(self) -> Dict[str, Union[str, AddedToken, List[U
         return set_attr
 
     @property
+    @cache
     def all_special_tokens(self) -> List[str]:
         """
         `List[str]`: All the special tokens (`'<unk>'`, `'<cls>'`, etc.) mapped to class attributes.
@@ -1270,6 +1272,7 @@ def all_special_tokens(self) -> List[str]:
         return all_toks
 
     @property
+    @cache
     def all_special_tokens_extended(self) -> List[Union[str, AddedToken]]:
         """
         `List[Union[str, AddedToken]]`: All the special tokens (`'<unk>'`, `'<cls>'`, etc.) mapped to class
diff --git a/tests/transformers/test_tokenizer_common.py b/tests/transformers/test_tokenizer_common.py
@@ -693,8 +693,9 @@ def test_add_special_tokens(self):
                 )["input_ids"]
                 self.assertEqual(encoded, input_encoded + special_token_id)
 
-                decoded = tokenizer.decode(encoded, skip_special_tokens=True)
-                self.assertTrue(special_token not in decoded)
+                # TODO(zhenglujing): Fix in future
+                # decoded = tokenizer.decode(encoded, skip_special_tokens=True)
+                # self.assertTrue(special_token not in decoded)
 
     def test_internal_consistency(self):
         tokenizers = self.get_tokenizers()