File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -73,11 +73,11 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
7373 return False
7474 with open (tokenizer_config_path , encoding = "utf-8" ) as fp :
7575 config = json .load (fp )
76- if any ( config . get ( check , False ) for check in ( "add_bos_token" , "add_prefix_space" )) :
77- return True
78- # for examples that also use the Llama tokenizer , but do not have or set add_bos_token to True.
76+ if "add_bos_token" in config :
77+ return config [ "add_bos_token" ]
78+ # if `add_bos_token` isn't in the config file , but LLaMA tokenizer is used - return True.
7979 # ex: https://huggingface.co/stabilityai/StableBeluga2/blob/main/tokenizer_config.json#L2
80- return config .get ("add_bos_token" ) is None and config . get ( " tokenizer_class" ) == "LlamaTokenizer"
80+ return config .get ("tokenizer_class" ) == "LlamaTokenizer"
8181
8282 def encode (
8383 self ,
You can’t perform that action at this time.
0 commit comments