21
21
from huggingface_hub import hf_hub_download
22
22
23
23
from paddlenlp import __version__
24
- from paddlenlp .transformers import *
25
24
from paddlenlp .utils .downloader import COMMUNITY_MODEL_PREFIX , get_path_from_url
26
25
from paddlenlp .utils .env import HF_CACHE_HOME , MODEL_HOME
27
26
from paddlenlp .utils .import_utils import is_fast_tokenizer_available
@@ -119,7 +118,7 @@ def get_configurations():
119
118
# So same config would map more than one tokenizer
120
119
if MAPPING_NAMES .get (name , None ) is None :
121
120
MAPPING_NAMES [name ] = []
122
- # (tokenizer_name, is_faster )
121
+ # (tokenizer_name, is_fast )
123
122
MAPPING_NAMES [name ].append ((tokenizer_name , fast_name != "" ))
124
123
return MAPPING_NAMES
125
124
@@ -135,7 +134,7 @@ class AutoTokenizer:
135
134
MAPPING_NAMES = get_configurations ()
136
135
_tokenizer_mapping = MAPPING_NAMES
137
136
_name_mapping = TOKENIZER_MAPPING_NAMES
138
- _faster_name_mapping = FAST_TOKENIZER_MAPPING_NAMES
137
+ _fast_name_mapping = FAST_TOKENIZER_MAPPING_NAMES
139
138
tokenizer_config_file = "tokenizer_config.json"
140
139
141
140
def __init__ (self , * args , ** kwargs ):
@@ -158,10 +157,10 @@ def _get_tokenizer_class_from_config(cls, pretrained_model_name_or_path, config_
158
157
import_class = importlib .import_module (f"paddlenlp.transformers.{ class_name } .tokenizer" )
159
158
tokenizer_class = getattr (import_class , init_class )
160
159
if use_fast :
161
- for faster_tokenizer_class , name in cls ._faster_name_mapping .items ():
160
+ for fast_tokenizer_class , name in cls ._fast_name_mapping .items ():
162
161
if name == class_name :
163
- import_class = importlib .import_module (f"paddlenlp.transformers.{ class_name } .faster_tokenizer " )
164
- tokenizer_class = getattr (import_class , faster_tokenizer_class )
162
+ import_class = importlib .import_module (f"paddlenlp.transformers.{ class_name } .fast_tokenizer " )
163
+ tokenizer_class = getattr (import_class , fast_tokenizer_class )
165
164
return tokenizer_class
166
165
# If no `init_class`, we use pattern recognition to recognize the tokenizer class.
167
166
else :
@@ -219,7 +218,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, from_hf_hub=False, *mode
219
218
print(type(tokenizer))
220
219
# <class 'paddlenlp.transformers.bert.tokenizer.BertTokenizer'>
221
220
"""
222
- # Default not to use faster tokenizer
221
+ # Default not to use fast tokenizer
223
222
use_fast = kwargs .pop ("use_fast" , False )
224
223
if "use_fast" in kwargs :
225
224
use_fast = kwargs .pop ("use_fast" , False )
@@ -267,7 +266,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, from_hf_hub=False, *mode
267
266
break
268
267
if not is_support_fast_tokenizer :
269
268
logger .warning (
270
- f"The tokenizer { actual_tokenizer_class } doesn't have the faster version."
269
+ f"The tokenizer { actual_tokenizer_class } doesn't have the fast version."
271
270
" Please check the map `paddlenlp.transformers.auto.tokenizer.FAST_TOKENIZER_MAPPING_NAMES`"
272
271
" to see which fast tokenizers are currently supported."
273
272
)
0 commit comments