From 30164c71dc542c1160ebfde09ac625078533e9f1 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 14:16:42 -0400 Subject: [PATCH 01/48] Update automerge.yml Comment temporarily disable time-consuming workflows. Comment out BERT based text classification workflow possibly permanently, as this is obsolete. --- .github/workflows/automerge.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 29b75cc..7e85568 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -46,12 +46,12 @@ jobs: run: python3 regression-example-ames-no-preproc.py - name: Test distributed random search Ames by running - Val set run: python3 regression-example-ames-no-preproc-val-set.py - - name: Test text classifier - random search - ham-spam - run: python3 text-class-ham-or-spam.py - timeout-minutes: 90 - - name: Test image classifier - small subset of CIFAR10 - timeout-minutes: 90 - run: python3 cifar10-example.py + # - name: Test text classifier - random search - ham-spam + # run: python3 text-class-ham-or-spam.py + # timeout-minutes: 90 + # - name: Test image classifier - small subset of CIFAR10 # add back + # timeout-minutes: 90 + # run: python3 cifar10-example.py - name: Phishing email detection with GPT2 embedding timeout-minutes: 120 run: python3 phishing_email_detection_gpt2.py From 89049660678384c3039f5467471bfe0b97eec8d8 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 14:17:59 -0400 Subject: [PATCH 02/48] Update automerge.yml Add branch to workflow. --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 7e85568..82aba07 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "main", "148-tensorflow-upgrades" ] + branches: [ "main", "154-benchmark-inference-times---cerebros-model-vs-original-gpt-2" ] permissions: contents: read From c7e8b3093577c99d712c70d29cea2bc70b5c8eeb Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 15:17:21 -0400 Subject: [PATCH 03/48] Update phishing_email_detection_gpt2.py Added a baseline fine tuning of the full GPT2 to compare against Cerebros text classifier. --- phishing_email_detection_gpt2.py | 260 +++++++++++++++++++------------ 1 file changed, 161 insertions(+), 99 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 9920ce9..d991b21 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -65,8 +65,15 @@ # # Tensors for training data and labels # -training_x = [tf.constant(X_train)] -train_labels = [tf.constant(y_train)] + +# Training data for baseline model +baseline_train_x = tf.constant(X_train) +baseline_train_y = tf.constant(y_train) + +# Packaged for Cerebros (multimodal, takes inputs as a list) +training_x = [baseline_train_x] +train_labels = [baseline_train_y] + # # Input and output shapes # @@ -90,9 +97,9 @@ def __init__(self, max_seq_length, **kwargs): # Set whether the GPT2 model's layers are trainable #self.encoder.trainable = False for layer in self.encoder.layers: - layer.trainable = False + layer.trainable = True # - self.encoder.layers[-2].trainable = True + # self.encoder.layers[-2].trainable = True # # Set the maximum sequence length for tokenization self.max_seq_length = max_seq_length @@ -121,101 +128,156 @@ def from_config(cls, config): # GPT2 configurables max_seq_length = 96 -# Base model +# GPT Baseline Model input_layer = Input(shape=(), dtype=tf.string) gpt2_layer = GPT2Layer(max_seq_length)(input_layer) #output = Flatten()(gpt2_layer) -base_model = Model(inputs=input_layer, outputs=gpt2_layer) -base_model.summary() - -"""### Cerebros search for the best model""" - -# -# Cerebros configurables -# -activation = 'gelu' -predecessor_level_connection_affinity_factor_first = 49.9999 -predecessor_level_connection_affinity_factor_main = 0.31456 -max_consecutive_lateral_connections = 22 -p_lateral_connection = 0.39256 -num_lateral_connection_tries_per_unit = 10 -learning_rate = 0.0000511065 -epochs = 6 # [1, 100] -batch_size = 13 -maximum_levels = 4 # [3,7] -maximum_units_per_level = 8 # [2,10] -maximum_neurons_per_unit = 5 # [2,20] - -# -# Logging -# -TIME = pendulum.now(tz='America/New_York').__str__()[:16]\ - .replace('T', '_')\ - .replace(':', '_')\ - .replace('-', '_') -PROJECT_NAME = f'{TIME}_cerebros_auto_ml_phishing_email_test' - -meta_trial_number = 42 # irrelevant unless in distributed training - -cerebros_automl = SimpleCerebrosRandomSearch( - unit_type=DenseUnit, - input_shapes=INPUT_SHAPES, - output_shapes=OUTPUT_SHAPES, - training_data=training_x, - labels=train_labels, - validation_split=0.35, - direction='maximize', - metric_to_rank_by="val_binary_accuracy", - minimum_levels=2, - maximum_levels=maximum_levels, - minimum_units_per_level=1, - maximum_units_per_level=maximum_units_per_level, - minimum_neurons_per_unit=1, - maximum_neurons_per_unit=maximum_neurons_per_unit, - activation=activation, - final_activation='sigmoid', - number_of_architecture_moities_to_try=2, - number_of_tries_per_architecture_moity=1, - minimum_skip_connection_depth=1, - maximum_skip_connection_depth=7, - predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first, - predecessor_level_connection_affinity_factor_first_rounding_rule='ceil', - predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main, - predecessor_level_connection_affinity_factor_main_rounding_rule='ceil', - predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay, - seed=8675309, - max_consecutive_lateral_connections=max_consecutive_lateral_connections, - gate_after_n_lateral_connections=3, - gate_activation_function=simple_sigmoid, - p_lateral_connection=p_lateral_connection, - p_lateral_connection_decay=zero_95_exp_decay, - num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit, - learning_rate=learning_rate, - loss=tf.keras.losses.CategoricalHinge(), - metrics=[tf.keras.metrics.BinaryAccuracy(), - tf.keras.metrics.Precision(), - tf.keras.metrics.Recall()], - epochs=epochs, - project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}", - model_graphs='model_graphs', - batch_size=batch_size, - meta_trial_number=meta_trial_number, - base_models=[base_model], - train_data_dtype=tf.string) - -result = cerebros_automl.run_random_search() - -print(f'Best accuracy achieved is {result}') -print(f'binary accuracy') - -"""### Testing the best model found""" - -# -# Load the best model (taking into account that it has a custom layer) -# -best_model_found =\ -tf.keras.models.load_model(cerebros_automl.best_model_path,\ -custom_objects={'GPT2Layer': GPT2Layer(max_seq_length)}) - -print('Evaluating on the test dataset') -best_model_found.evaluate(X_test, y_test) +binary_output = tf.keras.layers.Dense(1, activation='sigmoid')(gpt2_layer) +gpt_baseline_model = Model(inputs=input_layer, outputs=binary_output) + +gpt_baseline_model.compile( + optimizer=Adam(learning_rate=1e-4), # Small LR since we're fine-tuning GPT + loss='binary_crossentropy', + metrics=['accuracy', tf.keras.metrics.AUC(name='auc')] +) + +history = gpt_baseline_model.fit( + x=X_train, # Input data + y=y_train, # Labels + epochs=20, # Number of training iterations + batch_size=16, # Batch size small due to GPU memory constraints + validation_split=0.2, # Hold out 20% of training data for validation + shuffle=True, # Shuffle data at each epoch + callbacks=[ + tf.keras.callbacks.EarlyStopping( + monitor='val_loss', + patience=3, + restore_best_weights=True, + min_delta=0.001 + ), + tf.keras.callbacks.ReduceLROnPlateau( + monitor='val_loss', + factor=0.2, + patience=2, + min_lr=1e-6 + ) + ] +) + +hy = history["history"] +hy_df = pd.DataFrame(hy) +print(hy_df) + + + + + + + + + + + + + +# base_model = Model(inputs=input_layer, outputs=gpt2_layer) +# base_model.summary() + + + + + + + + +# """### Cerebros search for the best model""" + +# # +# # Cerebros configurables +# # +# activation = 'gelu' +# predecessor_level_connection_affinity_factor_first = 49.9999 +# predecessor_level_connection_affinity_factor_main = 0.31456 +# max_consecutive_lateral_connections = 22 +# p_lateral_connection = 0.39256 +# num_lateral_connection_tries_per_unit = 10 +# learning_rate = 0.0000511065 +# epochs = 6 # [1, 100] +# batch_size = 13 +# maximum_levels = 4 # [3,7] +# maximum_units_per_level = 8 # [2,10] +# maximum_neurons_per_unit = 5 # [2,20] + +# # +# # Logging +# # +# TIME = pendulum.now(tz='America/New_York').__str__()[:16]\ +# .replace('T', '_')\ +# .replace(':', '_')\ +# .replace('-', '_') +# PROJECT_NAME = f'{TIME}_cerebros_auto_ml_phishing_email_test' + +# meta_trial_number = 42 # irrelevant unless in distributed training + +# cerebros_automl = SimpleCerebrosRandomSearch( +# unit_type=DenseUnit, +# input_shapes=INPUT_SHAPES, +# output_shapes=OUTPUT_SHAPES, +# training_data=training_x, +# labels=train_labels, +# validation_split=0.35, +# direction='maximize', +# metric_to_rank_by="val_binary_accuracy", +# minimum_levels=2, +# maximum_levels=maximum_levels, +# minimum_units_per_level=1, +# maximum_units_per_level=maximum_units_per_level, +# minimum_neurons_per_unit=1, +# maximum_neurons_per_unit=maximum_neurons_per_unit, +# activation=activation, +# final_activation='sigmoid', +# number_of_architecture_moities_to_try=2, +# number_of_tries_per_architecture_moity=1, +# minimum_skip_connection_depth=1, +# maximum_skip_connection_depth=7, +# predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first, +# predecessor_level_connection_affinity_factor_first_rounding_rule='ceil', +# predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main, +# predecessor_level_connection_affinity_factor_main_rounding_rule='ceil', +# predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay, +# seed=8675309, +# max_consecutive_lateral_connections=max_consecutive_lateral_connections, +# gate_after_n_lateral_connections=3, +# gate_activation_function=simple_sigmoid, +# p_lateral_connection=p_lateral_connection, +# p_lateral_connection_decay=zero_95_exp_decay, +# num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit, +# learning_rate=learning_rate, +# loss=tf.keras.losses.CategoricalHinge(), +# metrics=[tf.keras.metrics.BinaryAccuracy(), +# tf.keras.metrics.Precision(), +# tf.keras.metrics.Recall()], +# epochs=epochs, +# project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}", +# model_graphs='model_graphs', +# batch_size=batch_size, +# meta_trial_number=meta_trial_number, +# base_models=[base_model], +# train_data_dtype=tf.string) + +# result = cerebros_automl.run_random_search() + +# print(f'Best accuracy achieved is {result}') +# print(f'binary accuracy') + +# """### Testing the best model found""" + +# # +# # Load the best model (taking into account that it has a custom layer) +# # +# best_model_found =\ +# tf.keras.models.load_model(cerebros_automl.best_model_path,\ +# custom_objects={'GPT2Layer': GPT2Layer(max_seq_length)}) + +# print('Evaluating on the test dataset') +# best_model_found.evaluate(X_test, y_test) From b790e64cbe54dda9d16e30dd5d0f35fb2180243e Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 17:03:18 -0400 Subject: [PATCH 04/48] Update phishing_email_detection_gpt2.py --- phishing_email_detection_gpt2.py | 223 ++++++++++++++++++------------- 1 file changed, 132 insertions(+), 91 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index d991b21..2106461 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -29,6 +29,8 @@ from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component\ import zero_7_exp_decay, zero_95_exp_decay, simple_sigmoid from ast import literal_eval +import time + # # Load the email data @@ -141,10 +143,14 @@ def from_config(cls, config): metrics=['accuracy', tf.keras.metrics.AUC(name='auc')] ) +gpt_t0 = time.time() + +print(gpt_baseline_model.summary()) + history = gpt_baseline_model.fit( x=X_train, # Input data y=y_train, # Labels - epochs=20, # Number of training iterations + epochs=4, # Number of training iterations batch_size=16, # Batch size small due to GPU memory constraints validation_split=0.2, # Hold out 20% of training data for validation shuffle=True, # Shuffle data at each epoch @@ -164,111 +170,146 @@ def from_config(cls, config): ] ) +gpt_t1 = time.time() +gpt_time_on_one_model_min = (gpt_t1 - gpt_t1) / 60 + hy = history["history"] hy_df = pd.DataFrame(hy) print(hy_df) +### Cerebros model: +# TokenizerLayer class to handle tokenization and return only token_ids +class TokenizerLayer(tf.keras.layers.Layer): + def __init__(self, max_seq_length, **kwargs): + super().__init__(**kwargs) + self.tokenizer = GPT2Tokenizer.from_preset("gpt2_base_en") + self.preprocessor = GPT2Preprocessor(self.tokenizer, sequence_length=max_seq_length) + self.max_seq_length = max_seq_length + def call(self, inputs): + processed = self.preprocessor(inputs) # Accepts tensor of strings, outputs {"token_ids": ...} + return processed["token_ids"] # Output shape: (batch_size, max_seq_length) + def get_config(self): + base_config = super().get_config() + base_config.update({"max_seq_length": self.max_seq_length}) + return base_config +VOCAB_SIZE = GPT2Tokenizer.vocabulary_size() +# Create cerebros_base_model +def build_cerebros_base_model(max_seq_length=96, embedding_dim=256, output_dim=VOCAB_SIZE): + input_layer = Input(shape=(), dtype=tf.string) # Text input + token_ids = TokenizerLayer(max_seq_length)(input_layer) + # Build embedding layer with GPT2 tokenizer's vocabulary size (50257 for GPT2Base) + embedded = tf.keras.layers.Embedding( + input_dim=GPT2Tokenizer.vocabulary_size(), # Uses standard GPT-2 vocab size + output_dim=embedding_dim, + mask_zero=True, # Handle tokens + name="custom_embedding" + )(token_ids) + + # Flatten for downstream models + flattened = Flatten()(embedded) + model = Model(inputs=input_layer, outputs=flattened) + return model +# Example usage (outputs depend on parameters, set embedding_dim as desired) +cerebros_base_model = build_cerebros_base_model(max_seq_length=96) +"""### Cerebros search for the best model""" +# +# Cerebros configurables +# +activation = 'gelu' +predecessor_level_connection_affinity_factor_first = 49.9999 +predecessor_level_connection_affinity_factor_main = 0.31456 +max_consecutive_lateral_connections = 22 +p_lateral_connection = 0.39256 +num_lateral_connection_tries_per_unit = 10 +learning_rate = 0.0000511065 +epochs = 6 # [1, 100] +batch_size = 13 +maximum_levels = 4 # [3,7] +maximum_units_per_level = 8 # [2,10] +maximum_neurons_per_unit = 5 # [2,20] +moities_to_try = 2 +tries_per_moity = 1 - -# base_model = Model(inputs=input_layer, outputs=gpt2_layer) -# base_model.summary() - - - - - - - - -# """### Cerebros search for the best model""" - -# # -# # Cerebros configurables -# # -# activation = 'gelu' -# predecessor_level_connection_affinity_factor_first = 49.9999 -# predecessor_level_connection_affinity_factor_main = 0.31456 -# max_consecutive_lateral_connections = 22 -# p_lateral_connection = 0.39256 -# num_lateral_connection_tries_per_unit = 10 -# learning_rate = 0.0000511065 -# epochs = 6 # [1, 100] -# batch_size = 13 -# maximum_levels = 4 # [3,7] -# maximum_units_per_level = 8 # [2,10] -# maximum_neurons_per_unit = 5 # [2,20] - -# # -# # Logging -# # -# TIME = pendulum.now(tz='America/New_York').__str__()[:16]\ -# .replace('T', '_')\ -# .replace(':', '_')\ -# .replace('-', '_') -# PROJECT_NAME = f'{TIME}_cerebros_auto_ml_phishing_email_test' - -# meta_trial_number = 42 # irrelevant unless in distributed training - -# cerebros_automl = SimpleCerebrosRandomSearch( -# unit_type=DenseUnit, -# input_shapes=INPUT_SHAPES, -# output_shapes=OUTPUT_SHAPES, -# training_data=training_x, -# labels=train_labels, -# validation_split=0.35, -# direction='maximize', -# metric_to_rank_by="val_binary_accuracy", -# minimum_levels=2, -# maximum_levels=maximum_levels, -# minimum_units_per_level=1, -# maximum_units_per_level=maximum_units_per_level, -# minimum_neurons_per_unit=1, -# maximum_neurons_per_unit=maximum_neurons_per_unit, -# activation=activation, -# final_activation='sigmoid', -# number_of_architecture_moities_to_try=2, -# number_of_tries_per_architecture_moity=1, -# minimum_skip_connection_depth=1, -# maximum_skip_connection_depth=7, -# predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first, -# predecessor_level_connection_affinity_factor_first_rounding_rule='ceil', -# predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main, -# predecessor_level_connection_affinity_factor_main_rounding_rule='ceil', -# predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay, -# seed=8675309, -# max_consecutive_lateral_connections=max_consecutive_lateral_connections, -# gate_after_n_lateral_connections=3, -# gate_activation_function=simple_sigmoid, -# p_lateral_connection=p_lateral_connection, -# p_lateral_connection_decay=zero_95_exp_decay, -# num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit, -# learning_rate=learning_rate, -# loss=tf.keras.losses.CategoricalHinge(), -# metrics=[tf.keras.metrics.BinaryAccuracy(), -# tf.keras.metrics.Precision(), -# tf.keras.metrics.Recall()], -# epochs=epochs, -# project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}", -# model_graphs='model_graphs', -# batch_size=batch_size, -# meta_trial_number=meta_trial_number, -# base_models=[base_model], -# train_data_dtype=tf.string) - -# result = cerebros_automl.run_random_search() - -# print(f'Best accuracy achieved is {result}') -# print(f'binary accuracy') +# +# Logging +# +TIME = pendulum.now(tz='America/New_York').__str__()[:16]\ + .replace('T', '_')\ + .replace(':', '_')\ + .replace('-', '_') +PROJECT_NAME = f'{TIME}_cerebros_auto_ml_phishing_email_test' + +meta_trial_number = 42 # irrelevant unless in distributed training + + +cerebros_automl = SimpleCerebrosRandomSearch( + unit_type=DenseUnit, + input_shapes=INPUT_SHAPES, + output_shapes=OUTPUT_SHAPES, + training_data=training_x, + labels=train_labels, + validation_split=0.35, + direction='maximize', + metric_to_rank_by="val_accuracy", + minimum_levels=2, + maximum_levels=maximum_levels, + minimum_units_per_level=1, + maximum_units_per_level=maximum_units_per_level, + minimum_neurons_per_unit=1, + maximum_neurons_per_unit=maximum_neurons_per_unit, + activation=activation, + final_activation='sigmoid', + number_of_architecture_moities_to_try=moities_to_try, + number_of_tries_per_architecture_moity=tries_per_moity, + minimum_skip_connection_depth=1, + maximum_skip_connection_depth=7, + predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first, + predecessor_level_connection_affinity_factor_first_rounding_rule='ceil', + predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main, + predecessor_level_connection_affinity_factor_main_rounding_rule='ceil', + predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay, + seed=8675309, + max_consecutive_lateral_connections=max_consecutive_lateral_connections, + gate_after_n_lateral_connections=3, + gate_activation_function=simple_sigmoid, + p_lateral_connection=p_lateral_connection, + p_lateral_connection_decay=zero_95_exp_decay, + num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit, + learning_rate=learning_rate, + loss=tf.keras.losses.CategoricalHinge(), + metrics=[tf.keras.metrics.Accuracy(), + tf.keras.metrics.Precision(), + tf.keras.metrics.Recall()], + epochs=epochs, + project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}", + model_graphs='model_graphs', + batch_size=batch_size, + meta_trial_number=meta_trial_number, + base_models=[cerebros_base_model], + train_data_dtype=tf.string) + +cerebros_t0 = time.time() +result = cerebros_automl.run_random_search() +cerebros_t1 = time.time() +cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60 +cerebros_time_per_model = cerebros_time_all_models_min / (moities_to_try * tries_per_moity) + +print(f"Cerebros trained 2 models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") +print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") + + +print(f'Cerebros best accuracy achieved is {result}') +print(f'val set accuracy') # """### Testing the best model found""" From 15ec9c2c6444648610a68ffa9ebcf9e0e872c505 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 17:09:42 -0400 Subject: [PATCH 05/48] Update phishing_email_detection_gpt2.py Forgot to add dropout. --- phishing_email_detection_gpt2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 2106461..05f2af5 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -213,9 +213,11 @@ def build_cerebros_base_model(max_seq_length=96, embedding_dim=256, output_dim=V # Flatten for downstream models flattened = Flatten()(embedded) - model = Model(inputs=input_layer, outputs=flattened) + dropout = tf.keras.layers.Dropout(.6)(flattened) + model = Model(inputs=input_layer, outputs=dropout) return model + # Example usage (outputs depend on parameters, set embedding_dim as desired) cerebros_base_model = build_cerebros_base_model(max_seq_length=96) From 0cfb4889c1a9dadbb28c5bbfeac2aa211ce56ef4 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 17:40:05 -0400 Subject: [PATCH 06/48] Update phishing_email_detection_gpt2.py Amendments to Cerebros model. --- phishing_email_detection_gpt2.py | 101 ++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 35 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 05f2af5..71d34ee 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -181,46 +181,71 @@ def from_config(cls, config): # TokenizerLayer class to handle tokenization and return only token_ids class TokenizerLayer(tf.keras.layers.Layer): + def __init__(self, max_seq_length, **kwargs): - super().__init__(**kwargs) - self.tokenizer = GPT2Tokenizer.from_preset("gpt2_base_en") - self.preprocessor = GPT2Preprocessor(self.tokenizer, sequence_length=max_seq_length) + # + super(GPT2Layer, self).__init__(**kwargs) + # + # Load the GPT2 tokenizer, preprocessor and model + self.tokenizer = GPT2Tokenizer.from_preset("gpt2_extra_large_en") # "gpt2_base_en" + self.preprocessor = GPT2Preprocessor(self.tokenizer, + sequence_length=max_seq_length) + # self.encoder = GPT2Backbone.from_preset("gpt2_base_en") + # + # Set whether the GPT2 model's layers are trainable + # self.encoder.trainable = False + # for layer in self.encoder.layers: + # layer.trainable = False + # + # self.encoder.layers[-2].trainable = True + # + # Set the maximum sequence length for tokenization self.max_seq_length = max_seq_length def call(self, inputs): - processed = self.preprocessor(inputs) # Accepts tensor of strings, outputs {"token_ids": ...} - return processed["token_ids"] # Output shape: (batch_size, max_seq_length) + # + # Output the GPT2 embedding + prep = self.preprocessor([inputs]) + # embedding = self.encoder(prep) + # avg_pool = tf.reduce_mean(embedding, axis=1) + # + return prep['token_ids'] def get_config(self): - base_config = super().get_config() - base_config.update({"max_seq_length": self.max_seq_length}) - return base_config + # + config = super(GPT2Layer, self).get_config() + config.update({'max_seq_length': self.max_seq_length}) + # + return config + + @classmethod + def from_config(cls, config): + # + return cls(max_seq_length=config['max_seq_length']) +# GPT2 configurables -VOCAB_SIZE = GPT2Tokenizer.vocabulary_size() +max_seq_length = 900 -# Create cerebros_base_model -def build_cerebros_base_model(max_seq_length=96, embedding_dim=256, output_dim=VOCAB_SIZE): - input_layer = Input(shape=(), dtype=tf.string) # Text input - token_ids = TokenizerLayer(max_seq_length)(input_layer) - # Build embedding layer with GPT2 tokenizer's vocabulary size (50257 for GPT2Base) - embedded = tf.keras.layers.Embedding( - input_dim=GPT2Tokenizer.vocabulary_size(), # Uses standard GPT-2 vocab size - output_dim=embedding_dim, - mask_zero=True, # Handle tokens - name="custom_embedding" - )(token_ids) - - # Flatten for downstream models - flattened = Flatten()(embedded) - dropout = tf.keras.layers.Dropout(.6)(flattened) - model = Model(inputs=input_layer, outputs=dropout) - return model +inp = tf.keras.layers.Input(shape=(), dtype=tf.string) +gp2_tokenizer = TokenizerLayer(max_seq_length=max_seq_length) +VOCABULARY_SIZE = gp2_tokenizer.tokenizer.vocabulary_size() +tokens = gp2_tokenizer(inp) -# Example usage (outputs depend on parameters, set embedding_dim as desired) -cerebros_base_model = build_cerebros_base_model(max_seq_length=96) +embedded =\ + tf.keras.layers.Embedding( + input_dim=VOCABULARY_SIZE, + output_dim=15, + input_length=max_seq_length, + mask_zero=True)(tokens) +dropout_embedded = tf.keras.layers.Dropout(0.6)(embedded) +flattened = tf.keras.layers.Flatten()(dropout_embedded) +cerebros_base_model =\ + tf.keras.Model( + inputs=inp, + outputs=flattened) """### Cerebros search for the best model""" @@ -234,11 +259,17 @@ def build_cerebros_base_model(max_seq_length=96, embedding_dim=256, output_dim=V p_lateral_connection = 0.39256 num_lateral_connection_tries_per_unit = 10 learning_rate = 0.0000511065 -epochs = 6 # [1, 100] -batch_size = 13 -maximum_levels = 4 # [3,7] -maximum_units_per_level = 8 # [2,10] +epochs = 15 # [1, 100] +batch_size = 20 +minimum_levels = 2 +maximum_levels = 4 # [3,7] + +minimum_units_per_level = 4 +maximum_units_per_level = 8 + +minimum_neurons_per_unit = 1 maximum_neurons_per_unit = 5 # [2,20] + moities_to_try = 2 tries_per_moity = 1 @@ -263,11 +294,11 @@ def build_cerebros_base_model(max_seq_length=96, embedding_dim=256, output_dim=V validation_split=0.35, direction='maximize', metric_to_rank_by="val_accuracy", - minimum_levels=2, + minimum_levels=minimum_levels, maximum_levels=maximum_levels, - minimum_units_per_level=1, + minimum_units_per_level=minimum_units_per_level, maximum_units_per_level=maximum_units_per_level, - minimum_neurons_per_unit=1, + minimum_neurons_per_unit=minimum_neurons_per_unit, maximum_neurons_per_unit=maximum_neurons_per_unit, activation=activation, final_activation='sigmoid', From 6f8695908a10a7b587ad2ed7bfc3e1f28776316a Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 17:43:56 -0400 Subject: [PATCH 07/48] Update phishing_email_detection_gpt2.py Reduce seq length to accelerate job completion. --- phishing_email_detection_gpt2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 71d34ee..d5700b2 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -225,7 +225,9 @@ def from_config(cls, config): # GPT2 configurables +# Optimal for accuracy: max_seq_length = 900 +max_seq_length = 250 inp = tf.keras.layers.Input(shape=(), dtype=tf.string) gp2_tokenizer = TokenizerLayer(max_seq_length=max_seq_length) From 830a2dcbf513f03f32027a39303a60b9392b4727 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 17:44:43 -0400 Subject: [PATCH 08/48] Update automerge.yml Up timeout to 300 min. --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 82aba07..9490c3e 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -53,5 +53,5 @@ jobs: # timeout-minutes: 90 # run: python3 cifar10-example.py - name: Phishing email detection with GPT2 embedding - timeout-minutes: 120 + timeout-minutes: 300 run: python3 phishing_email_detection_gpt2.py From 407f90cb8759eaeb745fb7362c490c23c8dbe954 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 18:54:18 -0400 Subject: [PATCH 09/48] Update phishing_email_detection_gpt2.py Correct history indexing error. --- phishing_email_detection_gpt2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index d5700b2..125fd4d 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -173,8 +173,7 @@ def from_config(cls, config): gpt_t1 = time.time() gpt_time_on_one_model_min = (gpt_t1 - gpt_t1) / 60 -hy = history["history"] -hy_df = pd.DataFrame(hy) +hy_df = pd.DataFrame(history.history) print(hy_df) ### Cerebros model: From d5bdbce83a96cf7345a0b950d95cc29de7b07920 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 18:57:52 -0400 Subject: [PATCH 10/48] Update phishing_email_detection_gpt2.py Temporary test to fast forward to cerebros model. --- phishing_email_detection_gpt2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 125fd4d..c721d34 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -137,6 +137,8 @@ def from_config(cls, config): binary_output = tf.keras.layers.Dense(1, activation='sigmoid')(gpt2_layer) gpt_baseline_model = Model(inputs=input_layer, outputs=binary_output) +## Un - string out this +""" gpt_baseline_model.compile( optimizer=Adam(learning_rate=1e-4), # Small LR since we're fine-tuning GPT loss='binary_crossentropy', @@ -176,6 +178,8 @@ def from_config(cls, config): hy_df = pd.DataFrame(history.history) print(hy_df) +""" + ### Cerebros model: # TokenizerLayer class to handle tokenization and return only token_ids From d8db0f1b9160ff73caef2f385def3f7a7c164b3a Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 19:01:16 -0400 Subject: [PATCH 11/48] Update phishing_email_detection_gpt2.py Comment out an artifact of GPT test so we this can lint and run. --- phishing_email_detection_gpt2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index c721d34..c00dfa4 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -343,7 +343,8 @@ def from_config(cls, config): cerebros_time_per_model = cerebros_time_all_models_min / (moities_to_try * tries_per_moity) print(f"Cerebros trained 2 models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +# Un-comment this !!!!! +# print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') From 014b3c3a48e369395404f64bcbd497e4bdb48b1d Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 19:16:06 -0400 Subject: [PATCH 12/48] Update phishing_email_detection_gpt2.py Fix errors from trying to work too fast ... --- phishing_email_detection_gpt2.py | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index c00dfa4..787a8b2 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -186,44 +186,22 @@ def from_config(cls, config): class TokenizerLayer(tf.keras.layers.Layer): def __init__(self, max_seq_length, **kwargs): - # - super(GPT2Layer, self).__init__(**kwargs) - # - # Load the GPT2 tokenizer, preprocessor and model - self.tokenizer = GPT2Tokenizer.from_preset("gpt2_extra_large_en") # "gpt2_base_en" - self.preprocessor = GPT2Preprocessor(self.tokenizer, - sequence_length=max_seq_length) - # self.encoder = GPT2Backbone.from_preset("gpt2_base_en") - # - # Set whether the GPT2 model's layers are trainable - # self.encoder.trainable = False - # for layer in self.encoder.layers: - # layer.trainable = False - # - # self.encoder.layers[-2].trainable = True - # - # Set the maximum sequence length for tokenization + super(TokenizerLayer, self).__init__(**kwargs) # Update this line + self.tokenizer = GPT2Tokenizer.from_preset("gpt2_extra_large_en") + self.preprocessor = GPT2Preprocessor(self.tokenizer, sequence_length=max_seq_length) self.max_seq_length = max_seq_length def call(self, inputs): - # - # Output the GPT2 embedding prep = self.preprocessor([inputs]) - # embedding = self.encoder(prep) - # avg_pool = tf.reduce_mean(embedding, axis=1) - # return prep['token_ids'] def get_config(self): - # - config = super(GPT2Layer, self).get_config() + config = super(TokenizerLayer, self).get_config() config.update({'max_seq_length': self.max_seq_length}) - # return config @classmethod def from_config(cls, config): - # return cls(max_seq_length=config['max_seq_length']) # GPT2 configurables From 0b67f881f0d08befb4a3549acd71ce1333fbe5b5 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 19:42:46 -0400 Subject: [PATCH 13/48] Update phishing_email_detection_gpt2.py Re-corrected the metrics BinaryAccuracy to correct AI introduced error. --- phishing_email_detection_gpt2.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 787a8b2..202f54c 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -70,7 +70,7 @@ # Training data for baseline model baseline_train_x = tf.constant(X_train) -baseline_train_y = tf.constant(y_train) +baseline_train_y = tf.constant(y_train, dtype=tf.int8) # Packaged for Cerebros (multimodal, takes inputs as a list) training_x = [baseline_train_x] @@ -142,7 +142,10 @@ def from_config(cls, config): gpt_baseline_model.compile( optimizer=Adam(learning_rate=1e-4), # Small LR since we're fine-tuning GPT loss='binary_crossentropy', - metrics=['accuracy', tf.keras.metrics.AUC(name='auc')] + # metrics=['accuracy', tf.keras.metrics.AUC(name='auc')] + metrics=[tf.keras.metrics.BinaryAccuracy(), + tf.keras.metrics.Precision(), + tf.keras.metrics.Recall()] ) gpt_t0 = time.time() @@ -303,9 +306,9 @@ def from_config(cls, config): num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit, learning_rate=learning_rate, loss=tf.keras.losses.CategoricalHinge(), - metrics=[tf.keras.metrics.Accuracy(), - tf.keras.metrics.Precision(), - tf.keras.metrics.Recall()], + metrics=[tf.keras.metrics.BinaryAccuracy(), + tf.keras.metrics.Precision(), + tf.keras.metrics.Recall()], epochs=epochs, project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}", model_graphs='model_graphs', From a480dfdbd7ca762fbfd3a60865045dc5f18b1858 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 19:54:18 -0400 Subject: [PATCH 14/48] Update phishing_email_detection_gpt2.py Correct metric to rank by (binary accuracy) ... --- phishing_email_detection_gpt2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 202f54c..273ded6 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -279,7 +279,7 @@ def from_config(cls, config): labels=train_labels, validation_split=0.35, direction='maximize', - metric_to_rank_by="val_accuracy", + metric_to_rank_by="val_binary_accuracy", minimum_levels=minimum_levels, maximum_levels=maximum_levels, minimum_units_per_level=minimum_units_per_level, From 0e72e61db1d482a9e5b1c26b8f9904c3164bd5eb Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 19:57:11 -0400 Subject: [PATCH 15/48] Update phishing_email_detection_gpt2.py Uncomment out GPT test ... --- phishing_email_detection_gpt2.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 273ded6..c073350 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -135,10 +135,10 @@ def from_config(cls, config): gpt2_layer = GPT2Layer(max_seq_length)(input_layer) #output = Flatten()(gpt2_layer) binary_output = tf.keras.layers.Dense(1, activation='sigmoid')(gpt2_layer) + gpt_baseline_model = Model(inputs=input_layer, outputs=binary_output) -## Un - string out this -""" + gpt_baseline_model.compile( optimizer=Adam(learning_rate=1e-4), # Small LR since we're fine-tuning GPT loss='binary_crossentropy', @@ -181,7 +181,6 @@ def from_config(cls, config): hy_df = pd.DataFrame(history.history) print(hy_df) -""" ### Cerebros model: @@ -321,11 +320,11 @@ def from_config(cls, config): result = cerebros_automl.run_random_search() cerebros_t1 = time.time() cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60 -cerebros_time_per_model = cerebros_time_all_models_min / (moities_to_try * tries_per_moity) +models_tried = moities_to_try * tries_per_moity +cerebros_time_per_model = cerebros_time_all_models_min / models_tried -print(f"Cerebros trained 2 models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -# Un-comment this !!!!! -# print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") +print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') From 3cd5945f9b3b07a20f3f536ade3153d4c3ffde7c Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 20:27:11 -0400 Subject: [PATCH 16/48] Update phishing_email_detection_gpt2.py Upped number of trials to 5. --- phishing_email_detection_gpt2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index c073350..0b45d24 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -255,7 +255,7 @@ def from_config(cls, config): minimum_neurons_per_unit = 1 maximum_neurons_per_unit = 5 # [2,20] -moities_to_try = 2 +moities_to_try = 5 tries_per_moity = 1 # From 6a9e88d3799e91ba4ef2aead23dd1113713d6b89 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sat, 22 Mar 2025 22:40:13 -0400 Subject: [PATCH 17/48] Update phishing_email_detection_gpt2.py Make seq len 750, fix typo. --- phishing_email_detection_gpt2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 0b45d24..7159ced 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -176,7 +176,7 @@ def from_config(cls, config): ) gpt_t1 = time.time() -gpt_time_on_one_model_min = (gpt_t1 - gpt_t1) / 60 +gpt_time_on_one_model_min = (gpt_t1 - gpt_t0) / 60 hy_df = pd.DataFrame(history.history) print(hy_df) @@ -210,7 +210,7 @@ def from_config(cls, config): # Optimal for accuracy: max_seq_length = 900 -max_seq_length = 250 +max_seq_length = 750 inp = tf.keras.layers.Input(shape=(), dtype=tf.string) gp2_tokenizer = TokenizerLayer(max_seq_length=max_seq_length) From f24a8583b5fa5a35777b183d1186a643a94d5534 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 23 Mar 2025 15:11:46 -0400 Subject: [PATCH 18/48] Update phishing_email_detection_gpt2.py Try 1024 seq len. --- phishing_email_detection_gpt2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 7159ced..32253d4 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -208,9 +208,9 @@ def from_config(cls, config): # GPT2 configurables -# Optimal for accuracy: -max_seq_length = 900 -max_seq_length = 750 +# Optimal for accuracy thus far: +# max_seq_length = 900 +max_seq_length = 1024 inp = tf.keras.layers.Input(shape=(), dtype=tf.string) gp2_tokenizer = TokenizerLayer(max_seq_length=max_seq_length) From 4e157563ae7a1c2130561ae88df82139dbf3d42d Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 23 Mar 2025 15:13:10 -0400 Subject: [PATCH 19/48] Update automerge.yml Added branch to the workflow... --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 9490c3e..9ab79a0 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "main", "154-benchmark-inference-times---cerebros-model-vs-original-gpt-2" ] + branches: [ "main", "156-try-1024-seq-length-with-cerebros-model-from-154" ] permissions: contents: read From 9a4db1554911173f37bd83c3c4cc2689228f5393 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Tue, 25 Mar 2025 18:49:07 -0400 Subject: [PATCH 20/48] Update phishing_email_detection_gpt2.py Added a positional embedding and a LayerNorm to the text embedding. --- phishing_email_detection_gpt2.py | 35 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 32253d4..0cc9b4d 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -84,6 +84,7 @@ """### A custom GPT2 encoder layer for text embedding""" +""" un - string out class GPT2Layer(tf.keras.layers.Layer): def __init__(self, max_seq_length, **kwargs): @@ -181,6 +182,7 @@ def from_config(cls, config): hy_df = pd.DataFrame(history.history) print(hy_df) +""" # end un - string out ### Cerebros model: @@ -224,13 +226,28 @@ def from_config(cls, config): output_dim=15, input_length=max_seq_length, mask_zero=True)(tokens) -dropout_embedded = tf.keras.layers.Dropout(0.6)(embedded) -flattened = tf.keras.layers.Flatten()(dropout_embedded) -cerebros_base_model =\ - tf.keras.Model( - inputs=inp, - outputs=flattened) +x = tf.keras.layers.add([embedded, position_embedding]) +x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) +x = tf.keras.layers.Dropout(0.6)(x) # AI suggested 0.4 +flattened = tf.keras.layers.Flatten()(x) + +cerebros_base_model = tf.keras.Model( + inputs=inp, + outputs=flattened # Output enhanced embeddings now +) + + + + + +# dropout_embedded = tf.keras.layers.Dropout(0.6)(embedded) +# flattened = tf.keras.layers.Flatten()(dropout_embedded) + +# cerebros_base_model =\ +# tf.keras.Model( +# inputs=inp, +# outputs=flattened) """### Cerebros search for the best model""" @@ -304,7 +321,8 @@ def from_config(cls, config): p_lateral_connection_decay=zero_95_exp_decay, num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit, learning_rate=learning_rate, - loss=tf.keras.losses.CategoricalHinge(), + loss=tf.keras.losses.BinaryCrossentropy(), + # loss=tf.keras.losses.CategoricalHinge(), metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()], @@ -324,7 +342,8 @@ def from_config(cls, config): cerebros_time_per_model = cerebros_time_all_models_min / models_tried print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +# Un-comment out the next line +# print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') From 59cfa23b5ee8c91d5431609f580f825470db55d4 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Tue, 25 Mar 2025 18:53:10 -0400 Subject: [PATCH 21/48] Update phishing_email_detection_gpt2.py Missed position embedding in copy and paste ... --- phishing_email_detection_gpt2.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 0cc9b4d..5336fc5 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -227,6 +227,12 @@ def from_config(cls, config): input_length=max_seq_length, mask_zero=True)(tokens) +position_embedding = tf.keras.layers.PositionEmbedding( + input_dim=max_seq_length, + output_dim=EMBEDDING_DIM, + embeddings_initializer="uniform" +)(embedded) + x = tf.keras.layers.add([embedded, position_embedding]) x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) x = tf.keras.layers.Dropout(0.6)(x) # AI suggested 0.4 From d928a5469cbbd3f922521bffa568b41c26dfa115 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Tue, 25 Mar 2025 19:10:16 -0400 Subject: [PATCH 22/48] Update phishing_email_detection_gpt2.py Synchronize embedding dim across embeddings. --- phishing_email_detection_gpt2.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 5336fc5..3fea678 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -220,12 +220,13 @@ def from_config(cls, config): tokens = gp2_tokenizer(inp) -embedded =\ - tf.keras.layers.Embedding( - input_dim=VOCABULARY_SIZE, - output_dim=15, - input_length=max_seq_length, - mask_zero=True)(tokens) +EMBEDDING_DIM = 15 # Define EMBEDDING_DIM here, to match your embedding layer. + +embedded = tf.keras.layers.Embedding( + input_dim=VOCABULARY_SIZE, + output_dim=EMBEDDING_DIM, + input_length=max_seq_length, + mask_zero=True)(tokens) position_embedding = tf.keras.layers.PositionEmbedding( input_dim=max_seq_length, From 3c25a2235fca470cb526b85bfb03ff24a3071e53 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Tue, 25 Mar 2025 19:46:32 -0400 Subject: [PATCH 23/48] Update phishing_email_detection_gpt2.py Corrected import of PositionEmbedding. --- phishing_email_detection_gpt2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 3fea678..052670f 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -14,6 +14,7 @@ import tensorflow as tf import tensorflow_text from keras_nlp.models import GPT2Tokenizer, GPT2Preprocessor, GPT2Backbone +from keras_nlp.layers import PositionEmbedding from sklearn.model_selection import train_test_split from sklearn.utils import shuffle from tensorflow.keras.utils import to_categorical @@ -228,10 +229,9 @@ def from_config(cls, config): input_length=max_seq_length, mask_zero=True)(tokens) -position_embedding = tf.keras.layers.PositionEmbedding( - input_dim=max_seq_length, - output_dim=EMBEDDING_DIM, - embeddings_initializer="uniform" +position_embedding = PositionEmbedding( + sequence_length=max_seq_length, + initializer="uniform", )(embedded) x = tf.keras.layers.add([embedded, position_embedding]) From 88a1bd57c385e5b40248a8ec6c9ac1ecb1125ead Mon Sep 17 00:00:00 2001 From: David Thrower Date: Tue, 25 Mar 2025 22:50:03 -0400 Subject: [PATCH 24/48] Update phishing_email_detection_gpt2.py Remove layernorm, concat instead of add. --- phishing_email_detection_gpt2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 052670f..489edc7 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -234,8 +234,9 @@ def from_config(cls, config): initializer="uniform", )(embedded) -x = tf.keras.layers.add([embedded, position_embedding]) -x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) +# x = tf.keras.layers.add([embedded, position_embedding]) +x = x = tf.keras.layers.Concatenate()([embedded, position_embedding]) +# x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) x = tf.keras.layers.Dropout(0.6)(x) # AI suggested 0.4 flattened = tf.keras.layers.Flatten()(x) From 42d9c4f0cddf200faea640b9301549ddefe90f20 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 26 Mar 2025 01:05:40 -0400 Subject: [PATCH 25/48] Update phishing_email_detection_gpt2.py Try addition to merge embeddings without LayerNorm --- phishing_email_detection_gpt2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 489edc7..c40f922 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -234,8 +234,8 @@ def from_config(cls, config): initializer="uniform", )(embedded) -# x = tf.keras.layers.add([embedded, position_embedding]) -x = x = tf.keras.layers.Concatenate()([embedded, position_embedding]) +x = tf.keras.layers.add([embedded, position_embedding]) +# x = x = tf.keras.layers.Concatenate()([embedded, position_embedding]) # x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) x = tf.keras.layers.Dropout(0.6)(x) # AI suggested 0.4 flattened = tf.keras.layers.Flatten()(x) From ed4641e1d717863b4fadd36ecd511989f6de5ea1 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 26 Mar 2025 12:54:09 -0400 Subject: [PATCH 26/48] Update phishing_email_detection_gpt2.py Restore optimal run with position embedding. Reduce max levels to fit the optimal run and reduce overhead. Test this to see if it works. if successful, add back the commented out comparison and PR. Then open an issue to optimize the params around this new model. We may need to run this on Katib to optimize the hyperparameters, as the model is fundamentally different than the original and can probably be optimized considerably. --- phishing_email_detection_gpt2.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index c40f922..b0b1d80 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -220,7 +220,8 @@ def from_config(cls, config): VOCABULARY_SIZE = gp2_tokenizer.tokenizer.vocabulary_size() tokens = gp2_tokenizer(inp) - +# On larger hardware, this could probably be increased considerably and +# Probably would improve performance ... EMBEDDING_DIM = 15 # Define EMBEDDING_DIM here, to match your embedding layer. embedded = tf.keras.layers.Embedding( @@ -234,9 +235,11 @@ def from_config(cls, config): initializer="uniform", )(embedded) -x = tf.keras.layers.add([embedded, position_embedding]) -# x = x = tf.keras.layers.Concatenate()([embedded, position_embedding]) -# x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) +# As an FYI, we tried an add layer both with and without +# LayerNorm ... It degraded accuracy +# Just an FYI for anyone trying to apply conventional wisdom +# to save you the time ... +x = x = tf.keras.layers.Concatenate()([embedded, position_embedding]) x = tf.keras.layers.Dropout(0.6)(x) # AI suggested 0.4 flattened = tf.keras.layers.Flatten()(x) @@ -272,7 +275,7 @@ def from_config(cls, config): epochs = 15 # [1, 100] batch_size = 20 minimum_levels = 2 -maximum_levels = 4 # [3,7] +maximum_levels = 3 # [3,7] minimum_units_per_level = 4 maximum_units_per_level = 8 From cdb445511a3fe81786b934827853ff2be1b4ee73 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 26 Mar 2025 14:50:24 -0400 Subject: [PATCH 27/48] Update phishing_email_detection_gpt2.py Hard set levels to the known optimum. --- phishing_email_detection_gpt2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index b0b1d80..f1105dc 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -274,7 +274,7 @@ def from_config(cls, config): learning_rate = 0.0000511065 epochs = 15 # [1, 100] batch_size = 20 -minimum_levels = 2 +minimum_levels = 3 maximum_levels = 3 # [3,7] minimum_units_per_level = 4 From 048eb1bf907adb339360e8057c32a0d8a43ae885 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 26 Mar 2025 15:34:54 -0400 Subject: [PATCH 28/48] Update phishing_email_detection_gpt2.py Corrected hard set on levels to correct optima. --- phishing_email_detection_gpt2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index f1105dc..9bb7e84 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -274,8 +274,8 @@ def from_config(cls, config): learning_rate = 0.0000511065 epochs = 15 # [1, 100] batch_size = 20 -minimum_levels = 3 -maximum_levels = 3 # [3,7] +minimum_levels = 4 +maximum_levels = 4 # [3,7] minimum_units_per_level = 4 maximum_units_per_level = 8 From b800cf7664e342ccfccfdb6500ca34edea05df86 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 26 Mar 2025 20:53:13 -0400 Subject: [PATCH 29/48] Update phishing_email_detection_gpt2.py Restore the best model yet. --- phishing_email_detection_gpt2.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 9bb7e84..3d99c85 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -85,7 +85,7 @@ """### A custom GPT2 encoder layer for text embedding""" -""" un - string out + class GPT2Layer(tf.keras.layers.Layer): def __init__(self, max_seq_length, **kwargs): @@ -183,7 +183,6 @@ def from_config(cls, config): hy_df = pd.DataFrame(history.history) print(hy_df) -""" # end un - string out ### Cerebros model: @@ -274,8 +273,8 @@ def from_config(cls, config): learning_rate = 0.0000511065 epochs = 15 # [1, 100] batch_size = 20 -minimum_levels = 4 -maximum_levels = 4 # [3,7] +minimum_levels = 2 +maximum_levels = 3 # [3,7] minimum_units_per_level = 4 maximum_units_per_level = 8 @@ -353,8 +352,7 @@ def from_config(cls, config): cerebros_time_per_model = cerebros_time_all_models_min / models_tried print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -# Un-comment out the next line -# print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') From 7930a2d102c0a155bd8f386882d8adf342189bf3 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 26 Mar 2025 20:55:08 -0400 Subject: [PATCH 30/48] Update automerge.yml Add back the CICD test for image CLS. Prepare for PR. --- .github/workflows/automerge.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 9ab79a0..8a3c062 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -49,9 +49,9 @@ jobs: # - name: Test text classifier - random search - ham-spam # run: python3 text-class-ham-or-spam.py # timeout-minutes: 90 - # - name: Test image classifier - small subset of CIFAR10 # add back - # timeout-minutes: 90 - # run: python3 cifar10-example.py + - name: Test image classifier - small subset of CIFAR10 # add back + timeout-minutes: 90 + run: python3 cifar10-example.py - name: Phishing email detection with GPT2 embedding timeout-minutes: 300 run: python3 phishing_email_detection_gpt2.py From e6ae27ca25be65eb00890f7663d7ad59c7a6c8c1 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 30 Mar 2025 16:06:07 -0400 Subject: [PATCH 31/48] Update automerge.yml Comment out workflows that we don't need in dev. Delete permanantly disused workflows --- .github/workflows/automerge.yml | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 8a3c062..0efdd14 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "main", "156-try-1024-seq-length-with-cerebros-model-from-154" ] + branches: [ "main", "158-try-adamw-optimizer" ] permissions: contents: read @@ -33,25 +33,16 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics -# - name: Test by running. -# run: python3 cerebros.py -# - name: Test distributed by running. -# run: python3 test_simple_cerebros_gridsearch.py -# - name: Test distributed random search wine by running. -# run: python3 random_search.py -# - name: Test CerebrosRealNeuronNetwork -# run: python3 realnn-regression-example-ames-no-preproc.py -# timeout-minutes: 45 - - name: Test distributed random search Ames by running - run: python3 regression-example-ames-no-preproc.py - - name: Test distributed random search Ames by running - Val set - run: python3 regression-example-ames-no-preproc-val-set.py + # - name: Test distributed random search Ames by running + # run: python3 regression-example-ames-no-preproc.py + # - name: Test distributed random search Ames by running - Val set + # run: python3 regression-example-ames-no-preproc-val-set.py # - name: Test text classifier - random search - ham-spam # run: python3 text-class-ham-or-spam.py # timeout-minutes: 90 - - name: Test image classifier - small subset of CIFAR10 # add back - timeout-minutes: 90 - run: python3 cifar10-example.py + # - name: Test image classifier - small subset of CIFAR10 # add back + # timeout-minutes: 90 + # run: python3 cifar10-example.py - name: Phishing email detection with GPT2 embedding - timeout-minutes: 300 + timeout-minutes: 420 run: python3 phishing_email_detection_gpt2.py From 0eab09e7094dd355c417084b8e8f28f66b81a15c Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 30 Mar 2025 16:23:20 -0400 Subject: [PATCH 32/48] Update neural_network_future.py Made AdamW the default optimizer. We need to parameterize this and an optional hyperparameter for the weight_decay. --- cerebros/neuralnetworkfuture/neural_network_future.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cerebros/neuralnetworkfuture/neural_network_future.py b/cerebros/neuralnetworkfuture/neural_network_future.py index b91adf6..4643794 100644 --- a/cerebros/neuralnetworkfuture/neural_network_future.py +++ b/cerebros/neuralnetworkfuture/neural_network_future.py @@ -332,8 +332,10 @@ def compile_neural_network(self): self.materialized_neural_network.compile( loss=self.loss, metrics=self.metrics, - optimizer=tf.keras.optimizers.Adam( - learning_rate=self.learning_rate), + optimizer=tf.keras.optimizers.AdamW( + learning_rate=self.learning_rate, + weight_decay=0.004 # Add weight decay parameter + ), jit_compile=jit_compile) def util_parse_connectivity_csv(self): From 8939f3cdb01668c7d57702f5484771124c4fc3f7 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Sun, 30 Mar 2025 16:31:26 -0400 Subject: [PATCH 33/48] Update phishing_email_detection_gpt2.py Test with default params with AdamW. --- phishing_email_detection_gpt2.py | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 3d99c85..ed79320 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -85,6 +85,7 @@ """### A custom GPT2 encoder layer for text embedding""" +""" class GPT2Layer(tf.keras.layers.Layer): @@ -183,6 +184,7 @@ def from_config(cls, config): hy_df = pd.DataFrame(history.history) print(hy_df) +""" ### Cerebros model: @@ -248,17 +250,6 @@ def from_config(cls, config): ) - - - -# dropout_embedded = tf.keras.layers.Dropout(0.6)(embedded) -# flattened = tf.keras.layers.Flatten()(dropout_embedded) - -# cerebros_base_model =\ -# tf.keras.Model( -# inputs=inp, -# outputs=flattened) - """### Cerebros search for the best model""" # @@ -352,7 +343,7 @@ def from_config(cls, config): cerebros_time_per_model = cerebros_time_all_models_min / models_tried print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +# print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') @@ -360,12 +351,3 @@ def from_config(cls, config): # """### Testing the best model found""" -# # -# # Load the best model (taking into account that it has a custom layer) -# # -# best_model_found =\ -# tf.keras.models.load_model(cerebros_automl.best_model_path,\ -# custom_objects={'GPT2Layer': GPT2Layer(max_seq_length)}) - -# print('Evaluating on the test dataset') -# best_model_found.evaluate(X_test, y_test) From 966f71451716e212cceab7bea4dc109a4224421c Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 2 Apr 2025 16:49:02 -0400 Subject: [PATCH 34/48] Update phishing_email_detection_gpt2.py Combined best hyperparams from the hyperparameter optimization study with AdamW optimizer. --- phishing_email_detection_gpt2.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index ed79320..605e014 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -223,7 +223,7 @@ def from_config(cls, config): # On larger hardware, this could probably be increased considerably and # Probably would improve performance ... -EMBEDDING_DIM = 15 # Define EMBEDDING_DIM here, to match your embedding layer. +EMBEDDING_DIM = 23 # Define EMBEDDING_DIM here, to match your embedding layer. embedded = tf.keras.layers.Embedding( input_dim=VOCABULARY_SIZE, @@ -241,7 +241,7 @@ def from_config(cls, config): # Just an FYI for anyone trying to apply conventional wisdom # to save you the time ... x = x = tf.keras.layers.Concatenate()([embedded, position_embedding]) -x = tf.keras.layers.Dropout(0.6)(x) # AI suggested 0.4 +x = tf.keras.layers.Dropout(0.4)(x) # AI suggested 0.4 flattened = tf.keras.layers.Flatten()(x) cerebros_base_model = tf.keras.Model( @@ -255,23 +255,23 @@ def from_config(cls, config): # # Cerebros configurables # -activation = 'gelu' -predecessor_level_connection_affinity_factor_first = 49.9999 -predecessor_level_connection_affinity_factor_main = 0.31456 -max_consecutive_lateral_connections = 22 -p_lateral_connection = 0.39256 -num_lateral_connection_tries_per_unit = 10 -learning_rate = 0.0000511065 +activation = "relu" +predecessor_level_connection_affinity_factor_first = 10 +predecessor_level_connection_affinity_factor_main = 40 +max_consecutive_lateral_connections = 20 +p_lateral_connection = 30 +num_lateral_connection_tries_per_unit = 25 +learning_rate = 3 * 10 ** -3 epochs = 15 # [1, 100] -batch_size = 20 +batch_size = 17 minimum_levels = 2 -maximum_levels = 3 # [3,7] +maximum_levels = 2 # [3,7] minimum_units_per_level = 4 -maximum_units_per_level = 8 +maximum_units_per_level = 7 minimum_neurons_per_unit = 1 -maximum_neurons_per_unit = 5 # [2,20] +maximum_neurons_per_unit = 2 moities_to_try = 5 tries_per_moity = 1 From 9724e9d0f284e32e69ac4d6e7e15aff0105b0f8a Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 2 Apr 2025 16:49:44 -0400 Subject: [PATCH 35/48] Update automerge.yml Add branch to workflow to make it start. --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 0efdd14..1e7a494 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "main", "158-try-adamw-optimizer" ] + branches: [ "main", "160-try-nlp-optima-from-2025-03-30-study-with-adamw" ] permissions: contents: read From 380928dc4ca8a2eaf08410b8df6089e59ac322b3 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 2 Apr 2025 19:03:28 -0400 Subject: [PATCH 36/48] Update automerge.yml Add back all to be used workflows. --- .github/workflows/automerge.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 1e7a494..f44c4b0 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -33,16 +33,16 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - # - name: Test distributed random search Ames by running - # run: python3 regression-example-ames-no-preproc.py - # - name: Test distributed random search Ames by running - Val set - # run: python3 regression-example-ames-no-preproc-val-set.py + - name: Test distributed random search Ames by running + run: python3 regression-example-ames-no-preproc.py + - name: Test distributed random search Ames by running - Val set + run: python3 regression-example-ames-no-preproc-val-set.py # - name: Test text classifier - random search - ham-spam # run: python3 text-class-ham-or-spam.py # timeout-minutes: 90 - # - name: Test image classifier - small subset of CIFAR10 # add back - # timeout-minutes: 90 - # run: python3 cifar10-example.py + - name: Test image classifier - small subset of CIFAR10 # add back + timeout-minutes: 90 + run: python3 cifar10-example.py - name: Phishing email detection with GPT2 embedding timeout-minutes: 420 run: python3 phishing_email_detection_gpt2.py From 9323f5f880113c76d7f5329ff7e21e2f6d8018cc Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 2 Apr 2025 19:05:24 -0400 Subject: [PATCH 37/48] Update phishing_email_detection_gpt2.py Added back the GPT baseline model for comparison. --- phishing_email_detection_gpt2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 605e014..25cdbc6 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -85,7 +85,6 @@ """### A custom GPT2 encoder layer for text embedding""" -""" class GPT2Layer(tf.keras.layers.Layer): @@ -184,7 +183,6 @@ def from_config(cls, config): hy_df = pd.DataFrame(history.history) print(hy_df) -""" ### Cerebros model: @@ -343,7 +341,7 @@ def from_config(cls, config): cerebros_time_per_model = cerebros_time_all_models_min / models_tried print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -# print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') From f683fb8e6c24ef9d92a06872573df6a19cef7fc6 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Wed, 2 Apr 2025 19:44:39 -0400 Subject: [PATCH 38/48] Update phishing_email_detection_gpt2.py Optimize NPL workflow for time's sake. --- phishing_email_detection_gpt2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py index 25cdbc6..91c1451 100644 --- a/phishing_email_detection_gpt2.py +++ b/phishing_email_detection_gpt2.py @@ -157,7 +157,7 @@ def from_config(cls, config): history = gpt_baseline_model.fit( x=X_train, # Input data y=y_train, # Labels - epochs=4, # Number of training iterations + epochs=3, # Number of training iterations batch_size=16, # Batch size small due to GPU memory constraints validation_split=0.2, # Hold out 20% of training data for validation shuffle=True, # Shuffle data at each epoch @@ -341,7 +341,7 @@ def from_config(cls, config): cerebros_time_per_model = cerebros_time_all_models_min / models_tried print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.") -print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") +print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model for 3 epochs. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.") print(f'Cerebros best accuracy achieved is {result}') From 69d9d1d1dcaec13a362b8ab255a4c6db19a693e9 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 15:03:32 -0400 Subject: [PATCH 39/48] Update requirements.txt Added tqdm 4.67.1 to requirements. --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 1964f13..146b1e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ pyvis==0.3.2 plotly==5.20.0 matplotlib==3.8.4 imageio==2.34.0 +tqdm==4.67.1 From ffb0e901c3323422f792beb2d58304970bead1dc Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 15:27:20 -0400 Subject: [PATCH 40/48] Update simple_cerebros_random_search.py Try adding a global progress bar. --- .../simple_cerebros_random_search.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index 125582c..b0690f5 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -518,8 +518,11 @@ def run_moity_permutations(self, spec, subtrial_number, lock): return 0 def run_random_search(self): + iter_trial = 1 processes = [] - for i in np.arange(self.number_of_architecture_moities_to_try): + for i in tqdm(np.arange(self.number_of_architecture_moities_to_try), + desc=f"Moiety {iter_trial}" of {self.number_of_architecture_moities_to_try} running", + colour="#16ceeb"): self.parse_neural_network_structural_spec_random() spec = self.get_neural_network_spec() @@ -537,6 +540,7 @@ def run_random_search(self): p.start() for p in processes: p.join() + iter_trial += 1 # final_oracles = pd.concat(oracles, ignore_index=False) # if self.direction == "maximize": # return float(final_oracles[self.metric_to_rank_by].values.max()) From 9fafcccdc1ee7729d9945e200dc0d053904c415e Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 15:28:23 -0400 Subject: [PATCH 41/48] Update automerge.yml Added branch to the workflow. --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index f44c4b0..d7ecd0a 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "main", "160-try-nlp-optima-from-2025-03-30-study-with-adamw" ] + branches: [ "main", "162-add-a-tqdm-global-progress-bar-to-nas-search-task" ] permissions: contents: read From aba7589ff4b06d770597b6b3aab12b16af7d5a7e Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 15:31:44 -0400 Subject: [PATCH 42/48] Update simple_cerebros_random_search.py Syntax correction. --- .../simplecerebrosrandomsearch/simple_cerebros_random_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index b0690f5..0063f0e 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -521,7 +521,7 @@ def run_random_search(self): iter_trial = 1 processes = [] for i in tqdm(np.arange(self.number_of_architecture_moities_to_try), - desc=f"Moiety {iter_trial}" of {self.number_of_architecture_moities_to_try} running", + desc=f"Moiety {iter_trial} of {self.number_of_architecture_moities_to_try} running", colour="#16ceeb"): self.parse_neural_network_structural_spec_random() spec = self.get_neural_network_spec() From f6284986d50eeeb01780da4e649ef2be655d33cc Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 15:33:47 -0400 Subject: [PATCH 43/48] Update simple_cerebros_random_search.py Added import statement ... --- .../simplecerebrosrandomsearch/simple_cerebros_random_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index 0063f0e..b48c601 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd import tensorflow as tf +from tqdm import tqdm from cerebros.denseautomlstructuralcomponent.\ dense_automl_structural_component \ import DenseAutoMlStructuralComponent, DenseLateralConnectivity, \ From 689f00394751ece3efeb0ab2d470576deed99614 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 15:44:11 -0400 Subject: [PATCH 44/48] Update simple_cerebros_random_search.py Try to deal with progress bar floating away. --- .../simplecerebrosrandomsearch/simple_cerebros_random_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index b48c601..1007557 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -523,6 +523,7 @@ def run_random_search(self): processes = [] for i in tqdm(np.arange(self.number_of_architecture_moities_to_try), desc=f"Moiety {iter_trial} of {self.number_of_architecture_moities_to_try} running", + ascii=True, colour="#16ceeb"): self.parse_neural_network_structural_spec_random() spec = self.get_neural_network_spec() From 40c35827748b86bc22e982078fa77a2a7214cdc5 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 16:17:19 -0400 Subject: [PATCH 45/48] Update simple_cerebros_random_search.py Fix increment of iter_trial. --- .../simplecerebrosrandomsearch/simple_cerebros_random_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index 1007557..52041e2 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -525,6 +525,7 @@ def run_random_search(self): desc=f"Moiety {iter_trial} of {self.number_of_architecture_moities_to_try} running", ascii=True, colour="#16ceeb"): + iter_trial += 1 self.parse_neural_network_structural_spec_random() spec = self.get_neural_network_spec() @@ -542,7 +543,6 @@ def run_random_search(self): p.start() for p in processes: p.join() - iter_trial += 1 # final_oracles = pd.concat(oracles, ignore_index=False) # if self.direction == "maximize": # return float(final_oracles[self.metric_to_rank_by].values.max()) From 642452ff4c2f1cc198c52e58be63f0660ed9db50 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 16:28:23 -0400 Subject: [PATCH 46/48] Update simple_cerebros_random_search.py Use self.trial_number as the basis for trial in tqdm. --- .../simple_cerebros_random_search.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index 52041e2..06ba512 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -519,13 +519,12 @@ def run_moity_permutations(self, spec, subtrial_number, lock): return 0 def run_random_search(self): - iter_trial = 1 processes = [] for i in tqdm(np.arange(self.number_of_architecture_moities_to_try), - desc=f"Moiety {iter_trial} of {self.number_of_architecture_moities_to_try} running", + desc=f"Moiety {self.trial_number + 1} of {self.number_of_architecture_moities_to_try} running", ascii=True, colour="#16ceeb"): - iter_trial += 1 + self.parse_neural_network_structural_spec_random() spec = self.get_neural_network_spec() From 6f7c1f098e8174688acf583260c982360d90f698 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 16:33:31 -0400 Subject: [PATCH 47/48] Update simple_cerebros_random_search.py --- .../simple_cerebros_random_search.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index 06ba512..a412f79 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -521,8 +521,7 @@ def run_moity_permutations(self, spec, subtrial_number, lock): def run_random_search(self): processes = [] for i in tqdm(np.arange(self.number_of_architecture_moities_to_try), - desc=f"Moiety {self.trial_number + 1} of {self.number_of_architecture_moities_to_try} running", - ascii=True, + desc=f"Global task progress", colour="#16ceeb"): self.parse_neural_network_structural_spec_random() From 713ac96a0f15755f42cf671cf1e67e961f4a9979 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Thu, 3 Apr 2025 16:36:48 -0400 Subject: [PATCH 48/48] Update simple_cerebros_random_search.py F string with no arguments replaced with regular string. --- .../simplecerebrosrandomsearch/simple_cerebros_random_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py index a412f79..0874e99 100644 --- a/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py +++ b/cerebros/simplecerebrosrandomsearch/simple_cerebros_random_search.py @@ -521,7 +521,7 @@ def run_moity_permutations(self, spec, subtrial_number, lock): def run_random_search(self): processes = [] for i in tqdm(np.arange(self.number_of_architecture_moities_to_try), - desc=f"Global task progress", + desc="Global task progress", colour="#16ceeb"): self.parse_neural_network_structural_spec_random()