@@ -79,6 +79,16 @@ def objective(trial: optuna.Trial) -> float:
7979 moities_to_try = 3 # ++ Accuracy, linear increase in computation time (Raise this before resorting to raising the next one)
8080 tries_per_moity = 1 # ++ Modest ++ Accuracy, quadratic increase in computation time
8181
82+ ## Generation time configurables: ##########
83+
84+ GENERATION_PROMPT_LEN = 25
85+ MAX_NEW_TOKENS = 14
86+ RESULT_CUTOFF = 11 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
87+
88+ if GENERATION_PROMPT_LEN + MAX_NEW_TOKENS > MAX_SEQ_LENGTH :
89+ raise ValueError ("Sequence length overflow: Generated text length (GENERATION_PROMPT_LEN + MAX_NEW_TOKENS) "
90+ "should be less than or equal to MAX_SEQ_LENGTH." )
91+
8292 ##### HP Tuning Parameters: ######### (Parameters to be optimized by TPE or SOBOL)
8393
8494
@@ -1020,8 +1030,9 @@ def call(self, inputs):
10201030
10211031 # mlflow.keras.log_model(generator, artifact_path="generator")
10221032 print ("########### BEFORE SEARIALIZING THE GENERATIVE MODEL" )
1023-
1024- def complete_text (text ):
1033+
1034+ # Utility function to generate text from greedy sampling:
1035+ def complete_text_greedy (text : str , max_new_tokens :int = 10 ) -> str :
10251036 input_ids = tokenizer (
10261037 text ,
10271038 add_special_tokens = False
@@ -1030,62 +1041,123 @@ def complete_text(text):
10301041 generated_tokens = generator .generate (
10311042 token_ids = input_ids , # Just the actual tokens, no padding
10321043 do_sample = False ,
1033- max_new_tokens = 10
1044+ max_new_tokens = max_new_tokens
10341045 )
10351046 generated_text = \
10361047 tokenizer .decode (generated_tokens ).replace (text , "" )
10371048 return generated_text
1038-
1039- test_text = "I saw the sun and it was as"
1040- response = complete_text (test_text )
1041-
1042- print (f"I ask the generator: { test_text } ... It responds:" )
1043- print (response )
1044-
1045- counter = 0
1046- for sample in non_instruct_samples :
1047-
1048-
1049- # Tokenize the text without padding first to get actual tokens
1050- sample_tokenized = tokenizer (
1051- sample ,
1049+
1050+ # Utility function to generate text from beam sampling:
1051+ def complete_text_beam (text : str ,
1052+ max_new_tokens : int = 10 ,
1053+ temperature : float = 0.75 ,
1054+ top_k : int = 75 ,
1055+ top_p : float = 0.98 ,
1056+ repetition_penalty : float = None ,
1057+ presence_penalty : float = 1.3 ,
1058+ frequency_penalty : float = 1.4 ) -> str :
1059+
1060+ input_ids = tokenizer (
1061+ text ,
10521062 add_special_tokens = False
10531063 )['input_ids' ]
1054- start_generate_index = int (np .ceil (len (sample_tokenized ) * 0.5 ))
1055- half_sample_tokenized = sample_tokenized [:start_generate_index ]
1056-
1057- # Convert to Python list of integers
1058- if hasattr (half_sample_tokenized , 'numpy' ):
1059- token_ids = half_sample_tokenized .numpy ().tolist ()
1060- else :
1061- token_ids = [int (token_id ) for token_id in half_sample_tokenized ]
1062-
1063- print (f"Actual token count: { len (token_ids )} " )
1064- print (f"First 10 tokens: { token_ids [:10 ]} " )
1065-
1066- # Now pass the list of integers to your generate method
1064+
10671065 generated_tokens = generator .generate (
1068- token_ids = token_ids , # Just the actual tokens, no padding
1066+ token_ids = input_ids , # Just the actual tokens, no padding
10691067 do_sample = True ,
1070- max_new_tokens = 20 ,
1071- temperature = 0.73 ,
1072- # One set of recommendations
1073- top_k = 75 ,
1074- top_p = 0.97 ,
1075- # Previous semi-working values
1076- # top_k=40,
1077- # top_p=0.985,
1068+ max_new_tokens = max_new_tokens ,
1069+ temperature = temperature ,
1070+ top_k = top_k ,
1071+ top_p = top_p ,
10781072 # repetition_penalty=1.2,
1079- presence_penalty = 1.2 ,
1080- frequency_penalty = 1.4
1073+ presence_penalty = presence_penalty ,
1074+ frequency_penalty = frequency_penalty
10811075 )
1076+ generated_text = \
1077+ tokenizer .decode (generated_tokens ).replace (text , "" )
1078+ return generated_text
1079+
1080+ test_text = "I saw the sun and it was as shining on the"
1081+ response = complete_text_greedy (test_text )
1082+ print (f"I ask the generator (greedy): { test_text } ... It responds: '{ response } '." )
1083+ response = complete_text_beam (test_text )
1084+ print (f"I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): { test_text } ... It responds: '{ response } '." )
1085+
1086+ def test_text (test_prompt : str , max_new_tokens : int , sample_number : int , result : float , result_cutoff ) -> None :
1087+ """
1088+ If the result < result_cutoff, this will run a matrix of different sampling values and print out the resulting text for human subjective evaluation.
1089+
1090+ Parameters:
1091+ - test_prompt: a string to prompt generation
1092+ - max_new_tokens: int, number of tokens to generate unless we generate a stop token.
1093+ - sample_number: Metadata for sample...
1094+ - result: Perplexity score from this run
1095+ - result cutoff: Perplexity score that would be expected to indicate a trial worth running this pn
1096+
1097+ """
1098+ response1 = response = complete_text_greedy (text = test_prompt , max_new_tokens = max_new_tokens )
1099+ print (f"Sample { sample_number } : I ask the generator (greedy): { test_prompt } ... It responds: '{ response1 } '." )
1100+ response2 = complete_text_beam (text = test_prompt , max_new_tokens = max_new_tokens )
1101+ print (f"Sample { sample_number } : I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): { test_prompt } ... It responds: '{ response2 } '." )
1102+ response_3 = complete_text_beam (text = test_prompt , max_new_tokens = max_new_tokens , temperature = 0.6 , top_k = 75 , top_p : 0.98 , repetition_penalty = None , presence_penalty : 1.3 = frequency_penalty = 1.4 )
1103+ print (f"Sample { sample_number } : I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): { test_prompt } ... It responds: '{ response2 } '." )
1104+ response_4 = complete_text_beam (text = test_prompt , max_new_tokens = max_new_tokens , temperature = 0.7 , top_k = 75 , top_p : 0.98 , repetition_penalty = None , presence_penalty : 1.3 = frequency_penalty = 1.4 )
1105+ print (f"Sample { sample_number } : I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): { test_prompt } ... It responds: '{ response3 } '." )
1106+ response_4 = complete_text_beam (text = test_prompt , max_new_tokens = max_new_tokens , temperature = 0.7 , top_k = 75 , top_p : 0.97 , repetition_penalty = None , presence_penalty : 1.3 = frequency_penalty = 1.4 )
1107+ print (f"Sample { sample_number } : I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p: 0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): { test_prompt } ... It responds: '{ response_4 } '." )
1108+
1109+
1110+ prompt_samples = [
1111+ "In the beginning God created the " ,
1112+ "And the earth was without form, and" ,
1113+ "And God said, Let there be light: and there " ,
1114+ "And God said, Let the waters under the heaven be gathered" ]
1115+
1116+
1117+ counter = 0
1118+ for sample in prompt_samples :
1119+ test_text (test_prompt = sample , max_new_tokens = MAX_NEW_TOKENS , sample_number = counter , result = result , result cutoff = RESULT_CUTOFF )
1120+
1121+ # # Tokenize the text without padding first to get actual tokens
1122+ # sample_tokenized = tokenizer(
1123+ # sample,
1124+ # add_special_tokens=False
1125+ # )['input_ids']
1126+ # start_generate_index = int(np.ceil(len(sample_tokenized) * 0.5))
1127+ # half_sample_tokenized = sample_tokenized[:start_generate_index]
1128+
1129+ # # Convert to Python list of integers
1130+ # if hasattr(half_sample_tokenized, 'numpy'):
1131+ # token_ids = half_sample_tokenized.numpy().tolist()
1132+ # else:
1133+ # token_ids = [int(token_id) for token_id in half_sample_tokenized]
1134+
1135+ # print(f"Actual token count: {len(token_ids)}")
1136+ # print(f"First 10 tokens: {token_ids[:10]}")
1137+
1138+ # # Now pass the list of integers to your generate method
1139+ # generated_tokens = generator.generate(
1140+ # token_ids=token_ids, # Just the actual tokens, no padding
1141+ # do_sample=True,
1142+ # max_new_tokens=20,
1143+ # temperature=0.73,
1144+ # # One set of recommendations
1145+ # top_k=75,
1146+ # top_p=0.97,
1147+ # # Previous semi-working values
1148+ # # top_k=40,
1149+ # # top_p=0.985,
1150+ # # repetition_penalty=1.2,
1151+ # presence_penalty=1.2,
1152+ # frequency_penalty=1.4
1153+ # )
10821154
1083- # Decode the result
1084- half_sample = tokenizer .decode (half_sample_tokenized )
1085- full_generated_text = tokenizer .decode (generated_tokens , skip_special_tokens = False )\
1086- .replace (half_sample , "" )
1155+ # # Decode the result
1156+ # half_sample = tokenizer.decode(half_sample_tokenized)
1157+ # full_generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=False)\
1158+ # .replace(half_sample, "")
10871159
1088- print (f"PROMPT number { counter } : { half_sample } ; RESPONSE: { full_generated_text } " )
1160+ # print(f"PROMPT number {counter}: {half_sample}; RESPONSE: {full_generated_text}")
10891161 counter += 1
10901162 mlflow .log_metric ("perplexity" , result , step = trial .number )
10911163 del (best_model_found )
0 commit comments