@@ -5575,6 +5575,9 @@ def set_vocab(self):
55755575 from transformers import AutoTokenizer
55765576 tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
55775577 tokpre = self .get_vocab_base_pre (tokenizer )
5578+ print ("====================================" , flush = True )
5579+ print (f'tokpre = { tokpre } ' , flush = True )
5580+ print ("====================================" , flush = True )
55785581 merges = []
55795582 vocab = {}
55805583 tokens : list [str ] = []
@@ -5597,10 +5600,10 @@ def set_vocab(self):
55975600
55985601 # 3. Generate the tokens and toktypes lists
55995602 vocab_size = self .hparams ["vocab_size" ]
5600- prit ("====================================" )
5601- print (f'self.hparams["vocab_size"] = { vocab_size } ' )
5602- print (f'tokenizer.vocab_size = { tokenizer .vocab_size } ' )
5603- prit ("====================================" )
5603+ print ("====================================" , flush = True )
5604+ print (f'self.hparams["vocab_size"] = { vocab_size } ' , flush = True )
5605+ print (f'tokenizer.vocab_size = { tokenizer .vocab_size } ' , flush = True )
5606+ print ("====================================" , flush = True )
56045607
56055608 assert tokenizer .vocab_size == vocab_size
56065609 special_tokens = tokenizer .special_tokens
0 commit comments