Skip to content

Commit d6d181b

Browse files
authored
[Tokenizer] Support fast tokenizer within AutoTokenizer import (#9466)
* add tokenizerfast import * add test case
1 parent 6141f80 commit d6d181b

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

paddlenlp/transformers/auto/tokenizer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,12 @@ def tokenizer_class_from_name(class_name: str):
178178

179179
return getattr(module, class_name)
180180
except AttributeError:
181-
raise ValueError(f"Tokenizer class {class_name} is not currently imported.")
181+
try:
182+
module = importlib.import_module(f".{module_name}.tokenizer_fast", "paddlenlp.transformers")
183+
184+
return getattr(module, class_name)
185+
except AttributeError:
186+
raise ValueError(f"Tokenizer class {class_name} is not currently imported.")
182187

183188
for config, tokenizers in TOKENIZER_MAPPING._extra_content.items():
184189
for tokenizer in tokenizers:

tests/transformers/auto/test_tokenizer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ def test_from_pretrained_cache_dir(self):
4848
# check against double appending model_name in cache_dir
4949
self.assertFalse(os.path.exists(os.path.join(tempdir, model_name, model_name)))
5050

51+
def test_from_pretrained_tokenizer_fast(self):
52+
tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-base-v2", use_fast=True)
53+
self.assertIsInstance(tokenizer, BertTokenizerFast)
54+
5155
def test_new_tokenizer_registration(self):
5256
try:
5357
AutoConfig.register("custom", CustomConfig)

0 commit comments

Comments
 (0)