From 9e740c94e2b91fe4defa6c57f640447e3a8ee7c4 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 18:28:33 -0400 Subject: [PATCH 1/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py Added some tests to print out text of lowest perplexity trials with different permutations of generation params. --- ...-of-concept-CPU-preprocessing-in-memory.py | 164 +++++++++++++----- 1 file changed, 118 insertions(+), 46 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index 9f56a00..481c6e5 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -79,6 +79,16 @@ def objective(trial: optuna.Trial) -> float: moities_to_try = 3 # ++ Accuracy, linear increase in computation time (Raise this before resorting to raising the next one) tries_per_moity = 1 # ++ Modest ++ Accuracy, quadratic increase in computation time + ## Generation time configurables: ########## + + GENERATION_PROMPT_LEN = 25 + MAX_NEW_TOKENS = 14 + RESULT_CUTOFF = 11 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF + + if GENERATION_PROMPT_LEN + MAX_NEW_TOKENS > MAX_SEQ_LENGTH: + raise ValueError("Sequence length overflow: Generated text length (GENERATION_PROMPT_LEN + MAX_NEW_TOKENS) " + "should be less than or equal to MAX_SEQ_LENGTH.") + ##### HP Tuning Parameters: ######### (Parameters to be optimized by TPE or SOBOL) @@ -1020,8 +1030,9 @@ def call(self, inputs): # mlflow.keras.log_model(generator, artifact_path="generator") print("########### BEFORE SEARIALIZING THE GENERATIVE MODEL") - - def complete_text(text): + + # Utility function to generate text from greedy sampling: + def complete_text_greedy(text: str, max_new_tokens:int=10) -> str: input_ids = tokenizer( text, add_special_tokens=False @@ -1030,62 +1041,123 @@ def complete_text(text): generated_tokens = generator.generate( token_ids=input_ids, # Just the actual tokens, no padding do_sample=False, - max_new_tokens=10 + max_new_tokens=max_new_tokens ) generated_text =\ tokenizer.decode(generated_tokens).replace(text, "") return generated_text - - test_text = "I saw the sun and it was as" - response = complete_text(test_text) - - print(f"I ask the generator: {test_text}... It responds:") - print(response) - - counter = 0 - for sample in non_instruct_samples: - - - # Tokenize the text without padding first to get actual tokens - sample_tokenized = tokenizer( - sample, + + # Utility function to generate text from beam sampling: + def complete_text_beam(text: str, + max_new_tokens: int=10, + temperature: float=0.75, + top_k: int=75, + top_p: float=0.98, + repetition_penalty: float=None, + presence_penalty: float=1.3, + frequency_penalty: float=1.4) -> str: + + input_ids = tokenizer( + text, add_special_tokens=False )['input_ids'] - start_generate_index = int(np.ceil(len(sample_tokenized) * 0.5)) - half_sample_tokenized = sample_tokenized[:start_generate_index] - - # Convert to Python list of integers - if hasattr(half_sample_tokenized, 'numpy'): - token_ids = half_sample_tokenized.numpy().tolist() - else: - token_ids = [int(token_id) for token_id in half_sample_tokenized] - - print(f"Actual token count: {len(token_ids)}") - print(f"First 10 tokens: {token_ids[:10]}") - - # Now pass the list of integers to your generate method + generated_tokens = generator.generate( - token_ids=token_ids, # Just the actual tokens, no padding + token_ids=input_ids, # Just the actual tokens, no padding do_sample=True, - max_new_tokens=20, - temperature=0.73, - # One set of recommendations - top_k=75, - top_p=0.97, - # Previous semi-working values - # top_k=40, - # top_p=0.985, + max_new_tokens=max_new_tokens, + temperature=temperature, + top_k=top_k, + top_p=top_p, # repetition_penalty=1.2, - presence_penalty=1.2, - frequency_penalty=1.4 + presence_penalty= presence_penalty, + frequency_penalty=frequency_penalty ) + generated_text =\ + tokenizer.decode(generated_tokens).replace(text, "") + return generated_text + + test_text = "I saw the sun and it was as shining on the" + response = complete_text_greedy(test_text) + print(f"I ask the generator (greedy): {test_text}... It responds: '{response}'.") + response = complete_text_beam(test_text) + print(f"I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_text}... It responds: '{response}'.") + + def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: float, result_cutoff) -> None: + """ + If the result < result_cutoff, this will run a matrix of different sampling values and print out the resulting text for human subjective evaluation. + + Parameters: + - test_prompt: a string to prompt generation + - max_new_tokens: int, number of tokens to generate unless we generate a stop token. + - sample_number: Metadata for sample... + - result: Perplexity score from this run + - result cutoff: Perplexity score that would be expected to indicate a trial worth running this pn + + """ + response1 = response = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens) + print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") + response2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) + print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response2}'.") + response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p: 0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p: 0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + + + prompt_samples = [ + "In the beginning God created the ", + "And the earth was without form, and", + "And God said, Let there be light: and there ", + "And God said, Let the waters under the heaven be gathered"] + + + counter = 0 + for sample in prompt_samples: + test_text(test_prompt=sample, max_new_tokens=MAX_NEW_TOKENS, sample_number= counter, result=result, result cutoff = RESULT_CUTOFF) + + # # Tokenize the text without padding first to get actual tokens + # sample_tokenized = tokenizer( + # sample, + # add_special_tokens=False + # )['input_ids'] + # start_generate_index = int(np.ceil(len(sample_tokenized) * 0.5)) + # half_sample_tokenized = sample_tokenized[:start_generate_index] + + # # Convert to Python list of integers + # if hasattr(half_sample_tokenized, 'numpy'): + # token_ids = half_sample_tokenized.numpy().tolist() + # else: + # token_ids = [int(token_id) for token_id in half_sample_tokenized] + + # print(f"Actual token count: {len(token_ids)}") + # print(f"First 10 tokens: {token_ids[:10]}") + + # # Now pass the list of integers to your generate method + # generated_tokens = generator.generate( + # token_ids=token_ids, # Just the actual tokens, no padding + # do_sample=True, + # max_new_tokens=20, + # temperature=0.73, + # # One set of recommendations + # top_k=75, + # top_p=0.97, + # # Previous semi-working values + # # top_k=40, + # # top_p=0.985, + # # repetition_penalty=1.2, + # presence_penalty=1.2, + # frequency_penalty=1.4 + # ) - # Decode the result - half_sample = tokenizer.decode(half_sample_tokenized) - full_generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=False)\ - .replace(half_sample, "") + # # Decode the result + # half_sample = tokenizer.decode(half_sample_tokenized) + # full_generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=False)\ + # .replace(half_sample, "") - print(f"PROMPT number {counter}: {half_sample}; RESPONSE: {full_generated_text}") + # print(f"PROMPT number {counter}: {half_sample}; RESPONSE: {full_generated_text}") counter += 1 mlflow.log_metric("perplexity", result, step=trial.number) del(best_model_found) From 201014e3802042c69002d5889074d11336a46c6a Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 18:29:20 -0400 Subject: [PATCH 2/8] Update automerge.yml Trigger tests to run. --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index ace0e9b..02c267f 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "main", "240-branch-to-diverge-cicd-scale-nlp-hpo-from-at-scale-study" ] + branches: [ "main", "249-improve-text-generation-samples-in-cicd-scale-test" ] permissions: contents: read From d4a5ac519e47872fa05d8ec99d27b7750b49f708 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 18:38:42 -0400 Subject: [PATCH 3/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py Syntax correction... --- ...e-proof-of-concept-CPU-preprocessing-in-memory.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index 481c6e5..d2a909c 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -1099,12 +1099,12 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") response2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response2}'.") - response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p: 0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p: 0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p: 0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") prompt_samples = [ From e7c45ebf218049ae64a12c54216bbed3788d1ea5 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 18:50:31 -0400 Subject: [PATCH 4/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py Syntax... --- ...e-proof-of-concept-CPU-preprocessing-in-memory.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index d2a909c..288dc80 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -1099,12 +1099,12 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") response2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response2}'.") - response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3 = frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") prompt_samples = [ From 3a97f0e69f1df791d65830f1692d1e924801f9f0 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 18:53:56 -0400 Subject: [PATCH 5/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py Syntax --- ...e-proof-of-concept-CPU-preprocessing-in-memory.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index 288dc80..2ef1258 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -1099,12 +1099,12 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") response2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response2}'.") - response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty: 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") prompt_samples = [ From 62efa711331f71a86af1213e910a104a8a58a7a7 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 18:57:18 -0400 Subject: [PATCH 6/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py Syntax / naming error. --- generative-proof-of-concept-CPU-preprocessing-in-memory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index 2ef1258..4163428 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -1092,7 +1092,7 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: - max_new_tokens: int, number of tokens to generate unless we generate a stop token. - sample_number: Metadata for sample... - result: Perplexity score from this run - - result cutoff: Perplexity score that would be expected to indicate a trial worth running this pn + - result_cutoff: Perplexity score that would be expected to indicate a trial worth running this pn """ response1 = response = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens) @@ -1116,7 +1116,7 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: counter = 0 for sample in prompt_samples: - test_text(test_prompt=sample, max_new_tokens=MAX_NEW_TOKENS, sample_number= counter, result=result, result cutoff = RESULT_CUTOFF) + test_text(test_prompt=sample, max_new_tokens=MAX_NEW_TOKENS, sample_number= counter, result=result, result_cutoff = RESULT_CUTOFF) # # Tokenize the text without padding first to get actual tokens # sample_tokenized = tokenizer( From 4520eb325263a8e55aac835dd840e1d249bb795a Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 19:03:47 -0400 Subject: [PATCH 7/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py More naming / reference errors ... --- ...proof-of-concept-CPU-preprocessing-in-memory.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index 4163428..96ededc 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -1097,14 +1097,14 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: """ response1 = response = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens) print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") - response2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) - print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response2}'.") + response_2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) + print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response_2}'.") response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response2}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response3}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_3}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + response_5 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_5}'.") prompt_samples = [ From 462368e5a142027310838d327a693ddf5a26fed9 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 28 Sep 2025 19:08:26 -0400 Subject: [PATCH 8/8] Update generative-proof-of-concept-CPU-preprocessing-in-memory.py Add conditional filtering for result < result_cutoff, so verbose prints only print when there is a result that makes sense to generate text from. --- ...-of-concept-CPU-preprocessing-in-memory.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py index 96ededc..8a34c0f 100644 --- a/generative-proof-of-concept-CPU-preprocessing-in-memory.py +++ b/generative-proof-of-concept-CPU-preprocessing-in-memory.py @@ -1095,16 +1095,17 @@ def test_text(test_prompt: str, max_new_tokens: int, sample_number: int, result: - result_cutoff: Perplexity score that would be expected to indicate a trial worth running this pn """ - response1 = response = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens) - print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") - response_2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) - print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response_2}'.") - response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_3}'.") - response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") - response_5 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) - print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_5}'.") + if result < result_cutoff: + response1 = response = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens) + print(f"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response1}'.") + response_2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens) + print(f"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response_2}'.") + response_3 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_3}'.") + response_4 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_4}'.") + response_5 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4) + print(f"Sample {sample_number}: I ask the generator (Beam: - max_new_tokens: 10, temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None, presence_penalty = 1.3, frequency_penalty = 1.4): {test_prompt}... It responds: '{response_5}'.") prompt_samples = [