File tree Expand file tree Collapse file tree 1 file changed +0
-17
lines changed Expand file tree Collapse file tree 1 file changed +0
-17
lines changed Original file line number Diff line number Diff line change 8
8
9
9
logger = init_logger (__name__ )
10
10
11
- # A fast LLaMA tokenizer with the pre-processed `tokenizer.json` file.
12
- _FAST_LLAMA_TOKENIZER = "hf-internal-testing/llama-tokenizer"
13
-
14
11
15
12
def get_tokenizer (
16
13
tokenizer_name : str ,
@@ -27,27 +24,13 @@ def get_tokenizer(
27
24
"Cannot use the fast tokenizer in slow tokenizer mode." )
28
25
kwargs ["use_fast" ] = False
29
26
30
- if ("llama" in tokenizer_name .lower () and kwargs .get ("use_fast" , True )
31
- and tokenizer_name != _FAST_LLAMA_TOKENIZER ):
32
- logger .info (
33
- "For some LLaMA V1 models, initializing the fast tokenizer may "
34
- "take a long time. To reduce the initialization time, consider "
35
- f"using '{ _FAST_LLAMA_TOKENIZER } ' instead of the original "
36
- "tokenizer." )
37
27
try :
38
28
tokenizer = AutoTokenizer .from_pretrained (
39
29
tokenizer_name ,
40
30
* args ,
41
31
trust_remote_code = trust_remote_code ,
42
32
tokenizer_revision = tokenizer_revision ,
43
33
** kwargs )
44
- except TypeError as e :
45
- # The LLaMA tokenizer causes a protobuf error in some environments.
46
- err_msg = (
47
- "Failed to load the tokenizer. If you are using a LLaMA V1 model "
48
- f"consider using '{ _FAST_LLAMA_TOKENIZER } ' instead of the "
49
- "original tokenizer." )
50
- raise RuntimeError (err_msg ) from e
51
34
except ValueError as e :
52
35
# If the error pertains to the tokenizer class not existing or not
53
36
# currently being imported, suggest using the --trust-remote-code flag.
You can’t perform that action at this time.
0 commit comments