Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
30164c7
Update automerge.yml
david-thrower Mar 22, 2025
8904966
Update automerge.yml
david-thrower Mar 22, 2025
c7e8b30
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
b790e64
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
15ec9c2
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
0cfb488
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
6f86959
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
830a2dc
Update automerge.yml
david-thrower Mar 22, 2025
407f90c
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
d5bdbce
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
d8db0f1
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
014b3c3
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
0b67f88
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
a480dfd
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
0e72e61
Update phishing_email_detection_gpt2.py
david-thrower Mar 22, 2025
3cd5945
Update phishing_email_detection_gpt2.py
david-thrower Mar 23, 2025
6a9e88d
Update phishing_email_detection_gpt2.py
david-thrower Mar 23, 2025
f24a858
Update phishing_email_detection_gpt2.py
david-thrower Mar 23, 2025
4e15756
Update automerge.yml
david-thrower Mar 23, 2025
9a4db15
Update phishing_email_detection_gpt2.py
david-thrower Mar 25, 2025
59cfa23
Update phishing_email_detection_gpt2.py
david-thrower Mar 25, 2025
d928a54
Update phishing_email_detection_gpt2.py
david-thrower Mar 25, 2025
3c25a22
Update phishing_email_detection_gpt2.py
david-thrower Mar 25, 2025
88a1bd5
Update phishing_email_detection_gpt2.py
david-thrower Mar 26, 2025
42d9c4f
Update phishing_email_detection_gpt2.py
david-thrower Mar 26, 2025
ed4641e
Update phishing_email_detection_gpt2.py
david-thrower Mar 26, 2025
cdb4455
Update phishing_email_detection_gpt2.py
david-thrower Mar 26, 2025
048eb1b
Update phishing_email_detection_gpt2.py
david-thrower Mar 26, 2025
b800cf7
Update phishing_email_detection_gpt2.py
david-thrower Mar 27, 2025
7930a2d
Update automerge.yml
david-thrower Mar 27, 2025
e6ae27c
Update automerge.yml
david-thrower Mar 30, 2025
0eab09e
Update neural_network_future.py
david-thrower Mar 30, 2025
8939f3c
Update phishing_email_detection_gpt2.py
david-thrower Mar 30, 2025
966f714
Update phishing_email_detection_gpt2.py
david-thrower Apr 2, 2025
9724e9d
Update automerge.yml
david-thrower Apr 2, 2025
380928d
Update automerge.yml
david-thrower Apr 2, 2025
9323f5f
Update phishing_email_detection_gpt2.py
david-thrower Apr 2, 2025
f683fb8
Update phishing_email_detection_gpt2.py
david-thrower Apr 2, 2025
69d9d1d
Update requirements.txt
david-thrower Apr 3, 2025
ffb0e90
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
9fafccc
Update automerge.yml
david-thrower Apr 3, 2025
aba7589
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
f628498
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
689f003
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
40c3582
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
642452f
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
6f7c1f0
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
713ac96
Update simple_cerebros_random_search.py
david-thrower Apr 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions .github/workflows/automerge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python application

on:
push:
branches: [ "main", "148-tensorflow-upgrades" ]
branches: [ "main", "162-add-a-tqdm-global-progress-bar-to-nas-search-task" ]

permissions:
contents: read
Expand Down Expand Up @@ -33,25 +33,16 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# - name: Test by running.
# run: python3 cerebros.py
# - name: Test distributed by running.
# run: python3 test_simple_cerebros_gridsearch.py
# - name: Test distributed random search wine by running.
# run: python3 random_search.py
# - name: Test CerebrosRealNeuronNetwork
# run: python3 realnn-regression-example-ames-no-preproc.py
# timeout-minutes: 45
- name: Test distributed random search Ames by running
run: python3 regression-example-ames-no-preproc.py
- name: Test distributed random search Ames by running - Val set
run: python3 regression-example-ames-no-preproc-val-set.py
- name: Test text classifier - random search - ham-spam
run: python3 text-class-ham-or-spam.py
timeout-minutes: 90
- name: Test image classifier - small subset of CIFAR10
# - name: Test text classifier - random search - ham-spam
# run: python3 text-class-ham-or-spam.py
# timeout-minutes: 90
- name: Test image classifier - small subset of CIFAR10 # add back
timeout-minutes: 90
run: python3 cifar10-example.py
- name: Phishing email detection with GPT2 embedding
timeout-minutes: 120
timeout-minutes: 420
run: python3 phishing_email_detection_gpt2.py
6 changes: 4 additions & 2 deletions cerebros/neuralnetworkfuture/neural_network_future.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,10 @@ def compile_neural_network(self):
self.materialized_neural_network.compile(
loss=self.loss,
metrics=self.metrics,
optimizer=tf.keras.optimizers.Adam(
learning_rate=self.learning_rate),
optimizer=tf.keras.optimizers.AdamW(
learning_rate=self.learning_rate,
weight_decay=0.004 # Add weight decay parameter
),
jit_compile=jit_compile)

def util_parse_connectivity_csv(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
from cerebros.denseautomlstructuralcomponent.\
dense_automl_structural_component \
import DenseAutoMlStructuralComponent, DenseLateralConnectivity, \
Expand Down Expand Up @@ -519,7 +520,10 @@ def run_moity_permutations(self, spec, subtrial_number, lock):

def run_random_search(self):
processes = []
for i in np.arange(self.number_of_architecture_moities_to_try):
for i in tqdm(np.arange(self.number_of_architecture_moities_to_try),
desc="Global task progress",
colour="#16ceeb"):

self.parse_neural_network_structural_spec_random()
spec = self.get_neural_network_spec()

Expand Down
210 changes: 170 additions & 40 deletions phishing_email_detection_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import tensorflow as tf
import tensorflow_text
from keras_nlp.models import GPT2Tokenizer, GPT2Preprocessor, GPT2Backbone
from keras_nlp.layers import PositionEmbedding
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras.utils import to_categorical
Expand All @@ -29,6 +30,8 @@
from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component\
import zero_7_exp_decay, zero_95_exp_decay, simple_sigmoid
from ast import literal_eval
import time


#
# Load the email data
Expand Down Expand Up @@ -65,8 +68,15 @@
#
# Tensors for training data and labels
#
training_x = [tf.constant(X_train)]
train_labels = [tf.constant(y_train)]

# Training data for baseline model
baseline_train_x = tf.constant(X_train)
baseline_train_y = tf.constant(y_train, dtype=tf.int8)

# Packaged for Cerebros (multimodal, takes inputs as a list)
training_x = [baseline_train_x]
train_labels = [baseline_train_y]

#
# Input and output shapes
#
Expand All @@ -75,6 +85,7 @@

"""### A custom GPT2 encoder layer for text embedding"""


class GPT2Layer(tf.keras.layers.Layer):

def __init__(self, max_seq_length, **kwargs):
Expand All @@ -90,9 +101,9 @@ def __init__(self, max_seq_length, **kwargs):
# Set whether the GPT2 model's layers are trainable
#self.encoder.trainable = False
for layer in self.encoder.layers:
layer.trainable = False
layer.trainable = True
#
self.encoder.layers[-2].trainable = True
# self.encoder.layers[-2].trainable = True
#
# Set the maximum sequence length for tokenization
self.max_seq_length = max_seq_length
Expand Down Expand Up @@ -121,30 +132,147 @@ def from_config(cls, config):
# GPT2 configurables
max_seq_length = 96

# Base model
# GPT Baseline Model
input_layer = Input(shape=(), dtype=tf.string)
gpt2_layer = GPT2Layer(max_seq_length)(input_layer)
#output = Flatten()(gpt2_layer)
base_model = Model(inputs=input_layer, outputs=gpt2_layer)
base_model.summary()
binary_output = tf.keras.layers.Dense(1, activation='sigmoid')(gpt2_layer)

gpt_baseline_model = Model(inputs=input_layer, outputs=binary_output)


gpt_baseline_model.compile(
optimizer=Adam(learning_rate=1e-4), # Small LR since we're fine-tuning GPT
loss='binary_crossentropy',
# metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
metrics=[tf.keras.metrics.BinaryAccuracy(),
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall()]
)

gpt_t0 = time.time()

print(gpt_baseline_model.summary())

history = gpt_baseline_model.fit(
x=X_train, # Input data
y=y_train, # Labels
epochs=3, # Number of training iterations
batch_size=16, # Batch size small due to GPU memory constraints
validation_split=0.2, # Hold out 20% of training data for validation
shuffle=True, # Shuffle data at each epoch
callbacks=[
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=3,
restore_best_weights=True,
min_delta=0.001
),
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=2,
min_lr=1e-6
)
]
)

gpt_t1 = time.time()
gpt_time_on_one_model_min = (gpt_t1 - gpt_t0) / 60

hy_df = pd.DataFrame(history.history)
print(hy_df)


### Cerebros model:

# TokenizerLayer class to handle tokenization and return only token_ids
class TokenizerLayer(tf.keras.layers.Layer):

def __init__(self, max_seq_length, **kwargs):
super(TokenizerLayer, self).__init__(**kwargs) # Update this line
self.tokenizer = GPT2Tokenizer.from_preset("gpt2_extra_large_en")
self.preprocessor = GPT2Preprocessor(self.tokenizer, sequence_length=max_seq_length)
self.max_seq_length = max_seq_length

def call(self, inputs):
prep = self.preprocessor([inputs])
return prep['token_ids']

def get_config(self):
config = super(TokenizerLayer, self).get_config()
config.update({'max_seq_length': self.max_seq_length})
return config

@classmethod
def from_config(cls, config):
return cls(max_seq_length=config['max_seq_length'])

# GPT2 configurables

# Optimal for accuracy thus far:
# max_seq_length = 900
max_seq_length = 1024

inp = tf.keras.layers.Input(shape=(), dtype=tf.string)
gp2_tokenizer = TokenizerLayer(max_seq_length=max_seq_length)
VOCABULARY_SIZE = gp2_tokenizer.tokenizer.vocabulary_size()
tokens = gp2_tokenizer(inp)

# On larger hardware, this could probably be increased considerably and
# Probably would improve performance ...
EMBEDDING_DIM = 23 # Define EMBEDDING_DIM here, to match your embedding layer.

embedded = tf.keras.layers.Embedding(
input_dim=VOCABULARY_SIZE,
output_dim=EMBEDDING_DIM,
input_length=max_seq_length,
mask_zero=True)(tokens)

position_embedding = PositionEmbedding(
sequence_length=max_seq_length,
initializer="uniform",
)(embedded)

# As an FYI, we tried an add layer both with and without
# LayerNorm ... It degraded accuracy
# Just an FYI for anyone trying to apply conventional wisdom
# to save you the time ...
x = x = tf.keras.layers.Concatenate()([embedded, position_embedding])
x = tf.keras.layers.Dropout(0.4)(x) # AI suggested 0.4
flattened = tf.keras.layers.Flatten()(x)

cerebros_base_model = tf.keras.Model(
inputs=inp,
outputs=flattened # Output enhanced embeddings now
)


"""### Cerebros search for the best model"""

#
# Cerebros configurables
#
activation = 'gelu'
predecessor_level_connection_affinity_factor_first = 49.9999
predecessor_level_connection_affinity_factor_main = 0.31456
max_consecutive_lateral_connections = 22
p_lateral_connection = 0.39256
num_lateral_connection_tries_per_unit = 10
learning_rate = 0.0000511065
epochs = 6 # [1, 100]
batch_size = 13
maximum_levels = 4 # [3,7]
maximum_units_per_level = 8 # [2,10]
maximum_neurons_per_unit = 5 # [2,20]
activation = "relu"
predecessor_level_connection_affinity_factor_first = 10
predecessor_level_connection_affinity_factor_main = 40
max_consecutive_lateral_connections = 20
p_lateral_connection = 30
num_lateral_connection_tries_per_unit = 25
learning_rate = 3 * 10 ** -3
epochs = 15 # [1, 100]
batch_size = 17
minimum_levels = 2
maximum_levels = 2 # [3,7]

minimum_units_per_level = 4
maximum_units_per_level = 7

minimum_neurons_per_unit = 1
maximum_neurons_per_unit = 2

moities_to_try = 5
tries_per_moity = 1

#
# Logging
Expand All @@ -157,6 +285,7 @@ def from_config(cls, config):

meta_trial_number = 42 # irrelevant unless in distributed training


cerebros_automl = SimpleCerebrosRandomSearch(
unit_type=DenseUnit,
input_shapes=INPUT_SHAPES,
Expand All @@ -166,16 +295,16 @@ def from_config(cls, config):
validation_split=0.35,
direction='maximize',
metric_to_rank_by="val_binary_accuracy",
minimum_levels=2,
minimum_levels=minimum_levels,
maximum_levels=maximum_levels,
minimum_units_per_level=1,
minimum_units_per_level=minimum_units_per_level,
maximum_units_per_level=maximum_units_per_level,
minimum_neurons_per_unit=1,
minimum_neurons_per_unit=minimum_neurons_per_unit,
maximum_neurons_per_unit=maximum_neurons_per_unit,
activation=activation,
final_activation='sigmoid',
number_of_architecture_moities_to_try=2,
number_of_tries_per_architecture_moity=1,
number_of_architecture_moities_to_try=moities_to_try,
number_of_tries_per_architecture_moity=tries_per_moity,
minimum_skip_connection_depth=1,
maximum_skip_connection_depth=7,
predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,
Expand All @@ -191,31 +320,32 @@ def from_config(cls, config):
p_lateral_connection_decay=zero_95_exp_decay,
num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,
learning_rate=learning_rate,
loss=tf.keras.losses.CategoricalHinge(),
metrics=[tf.keras.metrics.BinaryAccuracy(),
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall()],
loss=tf.keras.losses.BinaryCrossentropy(),
# loss=tf.keras.losses.CategoricalHinge(),
metrics=[tf.keras.metrics.BinaryAccuracy(),
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall()],
epochs=epochs,
project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}",
model_graphs='model_graphs',
batch_size=batch_size,
meta_trial_number=meta_trial_number,
base_models=[base_model],
base_models=[cerebros_base_model],
train_data_dtype=tf.string)

cerebros_t0 = time.time()
result = cerebros_automl.run_random_search()
cerebros_t1 = time.time()
cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60
models_tried = moities_to_try * tries_per_moity
cerebros_time_per_model = cerebros_time_all_models_min / models_tried

print(f'Best accuracy achieved is {result}')
print(f'binary accuracy')
print(f"Cerebros trained {models_tried} models FROM A COLD START in ONLY {cerebros_time_all_models_min} min. Cerebros took only {cerebros_time_per_model} minutes on average per model.")
print(f"GPT2 took {gpt_time_on_one_model_min} just to FINE TUNE one PRE - TRAINED model for 3 epochs. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing.")

"""### Testing the best model found"""

#
# Load the best model (taking into account that it has a custom layer)
#
best_model_found =\
tf.keras.models.load_model(cerebros_automl.best_model_path,\
custom_objects={'GPT2Layer': GPT2Layer(max_seq_length)})
print(f'Cerebros best accuracy achieved is {result}')
print(f'val set accuracy')

# """### Testing the best model found"""

print('Evaluating on the test dataset')
best_model_found.evaluate(X_test, y_test)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ pyvis==0.3.2
plotly==5.20.0
matplotlib==3.8.4
imageio==2.34.0
tqdm==4.67.1