@@ -856,18 +856,37 @@ def call(self, inputs):
856856
857857counter = 0
858858for sample in non_instruct_samples :
859- half_sample_len = int (np .ceil (len (sample )))
859+ half_sample_len = int (np .ceil (len (sample ) / 2 ))
860860 half_sample = sample [:half_sample_len ]
861+
862+ # Tokenize the text
861863 half_sample_tokenized = tokenizer (half_sample )
864+
865+ # Extract token IDs as a list of integers (not tensors)
866+ if isinstance (half_sample_tokenized , dict ):
867+ # If tokenizer returns a dict, extract the token IDs
868+ token_ids = half_sample_tokenized ['input_ids' ] # or 'token_ids' depending on your tokenizer
869+ else :
870+ # If tokenizer returns a list directly
871+ token_ids = half_sample_tokenized
872+
873+ # Convert to Python list of integers if it's a tensor
874+ if hasattr (token_ids , 'numpy' ):
875+ token_ids = token_ids .numpy ().tolist ()
876+ if not isinstance (token_ids , list ):
877+ token_ids = list (token_ids )
878+
879+ # Now pass the list of integers to your generate method
862880 generated_tokens = generator .generate (
863- token_ids = half_sample_tokenized ,
881+ token_ids = token_ids , # This should now be a list of integers
864882 do_sample = False ,
865883 max_new_tokens = 40
866884 )
885+
886+ # Decode the result
867887 full_generated_text = tokenizer .decode (generated_tokens , skip_special_tokens = False )
868888 print (f"PROMPT number { counter } : { half_sample } ; RESPONSE: { full_generated_text } " )
869-
870-
889+ counter += 1
871890
872891
873892
@@ -935,20 +954,39 @@ def call(self, inputs):
935954reconstituted_generator = tf .keras .models .load_model (model_save_path )
936955print ("Model reconstituted successfully!" )
937956
938- ##### here <--------<<<<<<
939-
940957counter = 0
941958for sample in non_instruct_samples :
942- half_sample_len = int (np .ceil (len (sample )))
959+ half_sample_len = int (np .ceil (len (sample ) / 2 ))
943960 half_sample = sample [:half_sample_len ]
961+
962+ # Tokenize the text
944963 half_sample_tokenized = tokenizer (half_sample )
964+
965+ # Extract token IDs as a list of integers (not tensors)
966+ if isinstance (half_sample_tokenized , dict ):
967+ # If tokenizer returns a dict, extract the token IDs
968+ token_ids = half_sample_tokenized ['input_ids' ] # or 'token_ids' depending on your tokenizer
969+ else :
970+ # If tokenizer returns a list directly
971+ token_ids = half_sample_tokenized
972+
973+ # Convert to Python list of integers if it's a tensor
974+ if hasattr (token_ids , 'numpy' ):
975+ token_ids = token_ids .numpy ().tolist ()
976+ if not isinstance (token_ids , list ):
977+ token_ids = list (token_ids )
978+
979+ # Now pass the list of integers to your generate method
945980 generated_tokens = reconstituted_generator .generate (
946- token_ids = half_sample_tokenized ,
981+ token_ids = token_ids , # This should now be a list of integers
947982 do_sample = False ,
948983 max_new_tokens = 40
949984 )
985+
986+ # Decode the result
950987 full_generated_text = tokenizer .decode (generated_tokens , skip_special_tokens = False )
951988 print (f"PROMPT number { counter } : { half_sample } ; RESPONSE: { full_generated_text } " )
989+ counter += 1
952990
953991# # Test with all original data samples - REAL WORLD DEMO (reconstituted)
954992# print("\n" + "="*50)
0 commit comments