Skip to content

Commit 787774a

Browse files
authored
Revert "fixErr: specialtoken 无法添加" (#2387)
1 parent 2ee8a1e commit 787774a

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

paddleformers/transformers/tokenizer_utils_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from collections import UserDict
2626
from dataclasses import dataclass
2727
from enum import Enum
28+
from functools import cache
2829
from typing import (
2930
Any,
3031
Dict,
@@ -1260,6 +1261,7 @@ def special_tokens_map_extended(self) -> Dict[str, Union[str, AddedToken, List[U
12601261
return set_attr
12611262

12621263
@property
1264+
@cache
12631265
def all_special_tokens(self) -> List[str]:
12641266
"""
12651267
`List[str]`: All the special tokens (`'<unk>'`, `'<cls>'`, etc.) mapped to class attributes.
@@ -1270,6 +1272,7 @@ def all_special_tokens(self) -> List[str]:
12701272
return all_toks
12711273

12721274
@property
1275+
@cache
12731276
def all_special_tokens_extended(self) -> List[Union[str, AddedToken]]:
12741277
"""
12751278
`List[Union[str, AddedToken]]`: All the special tokens (`'<unk>'`, `'<cls>'`, etc.) mapped to class

tests/transformers/test_tokenizer_common.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -693,8 +693,9 @@ def test_add_special_tokens(self):
693693
)["input_ids"]
694694
self.assertEqual(encoded, input_encoded + special_token_id)
695695

696-
decoded = tokenizer.decode(encoded, skip_special_tokens=True)
697-
self.assertTrue(special_token not in decoded)
696+
# TODO(zhenglujing): Fix in future
697+
# decoded = tokenizer.decode(encoded, skip_special_tokens=True)
698+
# self.assertTrue(special_token not in decoded)
698699

699700
def test_internal_consistency(self):
700701
tokenizers = self.get_tokenizers()

0 commit comments

Comments
 (0)