Skip to content

Commit ec1303c

Browse files
authored
fixErr: specialtoken 无法添加 (#2376)
1 parent b7d0975 commit ec1303c

File tree

2 files changed

+2
-6
lines changed

2 files changed

+2
-6
lines changed

paddleformers/transformers/tokenizer_utils_base.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from collections import UserDict
2626
from dataclasses import dataclass
2727
from enum import Enum
28-
from functools import cache
2928
from typing import (
3029
Any,
3130
Dict,
@@ -1261,7 +1260,6 @@ def special_tokens_map_extended(self) -> Dict[str, Union[str, AddedToken, List[U
12611260
return set_attr
12621261

12631262
@property
1264-
@cache
12651263
def all_special_tokens(self) -> List[str]:
12661264
"""
12671265
`List[str]`: All the special tokens (`'<unk>'`, `'<cls>'`, etc.) mapped to class attributes.
@@ -1272,7 +1270,6 @@ def all_special_tokens(self) -> List[str]:
12721270
return all_toks
12731271

12741272
@property
1275-
@cache
12761273
def all_special_tokens_extended(self) -> List[Union[str, AddedToken]]:
12771274
"""
12781275
`List[Union[str, AddedToken]]`: All the special tokens (`'<unk>'`, `'<cls>'`, etc.) mapped to class

tests/transformers/test_tokenizer_common.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -693,9 +693,8 @@ def test_add_special_tokens(self):
693693
)["input_ids"]
694694
self.assertEqual(encoded, input_encoded + special_token_id)
695695

696-
# TODO(zhenglujing): Fix in future
697-
# decoded = tokenizer.decode(encoded, skip_special_tokens=True)
698-
# self.assertTrue(special_token not in decoded)
696+
decoded = tokenizer.decode(encoded, skip_special_tokens=True)
697+
self.assertTrue(special_token not in decoded)
699698

700699
def test_internal_consistency(self):
701700
tokenizers = self.get_tokenizers()

0 commit comments

Comments
 (0)