Skip to content

Commit c239ca8

Browse files
authored
[fix] ADDTOKEN (#2545)
1 parent 73442e1 commit c239ca8

File tree

3 files changed

+48
-1
lines changed

3 files changed

+48
-1
lines changed

paddleformers/transformers/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing import TYPE_CHECKING
1919
from ..utils.lazy_import import _LazyModule
2020

21+
2122
# from .auto.modeling import AutoModelForCausalLM
2223
import_structure = {
2324
"kto_criterion": [
@@ -35,10 +36,10 @@
3536
"BPETokenizer",
3637
"tokenize_chinese_chars",
3738
"is_chinese_char",
38-
"AddedToken",
3939
"normalize_chars",
4040
"tokenize_special_chars",
4141
"convert_to_unicode",
42+
"AddedToken",
4243
],
4344
"attention_utils": ["create_bigbird_rand_mask_idx_list"],
4445
"tensor_parallel_utils": [],
@@ -334,6 +335,8 @@
334335
"qwen2_moe": [],
335336
"qwen3_moe": [],
336337
"auto": ["AutoModelForCausalLM"],
338+
"legacy.tokenizer_utils_base": ["EncodingFast"],
339+
"legacy": [],
337340
}
338341

339342
if TYPE_CHECKING:

paddleformers/transformers/legacy/__init__.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,46 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
import sys
16+
from typing import TYPE_CHECKING
17+
18+
from ...utils.lazy_import import _LazyModule
19+
20+
import_structure = {
21+
"tokenizer_utils": [
22+
"PretrainedTokenizer",
23+
"BPETokenizer",
24+
"tokenize_chinese_chars",
25+
"is_chinese_char",
26+
"normalize_chars",
27+
"tokenize_special_chars",
28+
"convert_to_unicode",
29+
],
30+
"tokenizer_utils_base": [
31+
"import_protobuf_decode_error",
32+
"ExplicitEnum",
33+
"PaddingStrategy",
34+
"TensorType",
35+
"to_py_obj",
36+
"_is_numpy",
37+
"TruncationStrategy",
38+
"CharSpan",
39+
"TokenSpan",
40+
"BatchEncoding",
41+
"SpecialTokensMixin",
42+
"PretrainedTokenizerBase",
43+
"EncodingFast",
44+
],
45+
}
46+
47+
if TYPE_CHECKING:
48+
from .tokenizer_utils import *
49+
from .tokenizer_utils_base import *
50+
else:
51+
sys.modules[__name__] = _LazyModule(
52+
__name__,
53+
globals()["__file__"],
54+
import_structure,
55+
module_spec=__spec__,
56+
)

paddleformers/transformers/tokenizer_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from functools import wraps
2222
from typing import Any, Dict, List, Optional, Tuple, Union
2323

24+
from tokenizers import AddedToken # noqa: F401
2425
from transformers import BatchEncoding
2526
from transformers.tokenization_utils import (
2627
PreTrainedTokenizer as PreTrainedTokenizer_tf,

0 commit comments

Comments
 (0)