@@ -856,16 +856,37 @@ def call(self, inputs):
856856
857857counter = 0
858858for sample in non_instruct_samples :
859- half_sample_len = int (np .ceil (len (sample )))
859+ half_sample_len = int (np .ceil (len (sample ) / 2 ))
860860 half_sample = sample [:half_sample_len ]
861+
862+ # Tokenize the text
861863 half_sample_tokenized = tokenizer (half_sample )
864+
865+ # Extract token IDs as a list of integers (not tensors)
866+ if isinstance (half_sample_tokenized , dict ):
867+ # If tokenizer returns a dict, extract the token IDs
868+ token_ids = half_sample_tokenized ['input_ids' ] # or 'token_ids' depending on your tokenizer
869+ else :
870+ # If tokenizer returns a list directly
871+ token_ids = half_sample_tokenized
872+
873+ # Convert to Python list of integers if it's a tensor
874+ if hasattr (token_ids , 'numpy' ):
875+ token_ids = token_ids .numpy ().tolist ()
876+ if not isinstance (token_ids , list ):
877+ token_ids = list (token_ids )
878+
879+ # Now pass the list of integers to your generate method
862880 generated_tokens = generator .generate (
863- token_ids = half_sample_tokenized ,
881+ token_ids = token_ids , # This should now be a list of integers
864882 do_sample = False ,
865883 max_new_tokens = 40
866884 )
885+
886+ # Decode the result
867887 full_generated_text = tokenizer .decode (generated_tokens , skip_special_tokens = False )
868888 print (f"PROMPT number { counter } : { half_sample } ; RESPONSE: { full_generated_text } " )
889+ counter += 1
869890
870891
871892
@@ -939,16 +960,37 @@ def call(self, inputs):
939960
940961counter = 0
941962for sample in non_instruct_samples :
942- half_sample_len = int (np .ceil (len (sample )))
963+ half_sample_len = int (np .ceil (len (sample ) / 2 ))
943964 half_sample = sample [:half_sample_len ]
965+
966+ # Tokenize the text
944967 half_sample_tokenized = tokenizer (half_sample )
968+
969+ # Extract token IDs as a list of integers (not tensors)
970+ if isinstance (half_sample_tokenized , dict ):
971+ # If tokenizer returns a dict, extract the token IDs
972+ token_ids = half_sample_tokenized ['input_ids' ] # or 'token_ids' depending on your tokenizer
973+ else :
974+ # If tokenizer returns a list directly
975+ token_ids = half_sample_tokenized
976+
977+ # Convert to Python list of integers if it's a tensor
978+ if hasattr (token_ids , 'numpy' ):
979+ token_ids = token_ids .numpy ().tolist ()
980+ if not isinstance (token_ids , list ):
981+ token_ids = list (token_ids )
982+
983+ # Now pass the list of integers to your generate method
945984 generated_tokens = reconstituted_generator .generate (
946- token_ids = half_sample_tokenized ,
985+ token_ids = token_ids , # This should now be a list of integers
947986 do_sample = False ,
948987 max_new_tokens = 40
949988 )
989+
990+ # Decode the result
950991 full_generated_text = tokenizer .decode (generated_tokens , skip_special_tokens = False )
951992 print (f"PROMPT number { counter } : { half_sample } ; RESPONSE: { full_generated_text } " )
993+ counter += 1
952994
953995# # Test with all original data samples - REAL WORLD DEMO (reconstituted)
954996# print("\n" + "="*50)
0 commit comments