Skip to content

Commit 7d3787b

Browse files
Update phishing_email_detection_gpt2.py
1. Test SmollmV3 tokenizer. 2. Get saved model sizes.
1 parent 6cea4e8 commit 7d3787b

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

phishing_email_detection_gpt2.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from ast import literal_eval
3333
import time
3434
from gc import collect
35+
from os.path import getsize
3536

3637
#
3738
# Load the email data
@@ -403,7 +404,7 @@ def from_config(cls, config):
403404

404405
# Optimal for accuracy thus far:
405406
max_seq_length = 1536
406-
tokenizer_checkpoint = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
407+
tokenizer_checkpoint = "HuggingFaceTB/SmolLM3-3B"
407408

408409
inp = tf.keras.layers.Input(shape=(), dtype=tf.string)
409410
gp2_tokenizer = NewTokenizerLayer(max_seq_length=max_seq_length,tokenizer_checkpoint=tokenizer_checkpoint)
@@ -550,6 +551,9 @@ def from_config(cls, config):
550551
del(cerebros_automl)
551552
collect()
552553

554+
file_size_bytes = getsize(MODEL_FILE_NAME)
555+
print(f"Model size on disk: {file_size_bytes / (1024*1024):.2f} MB")
556+
553557
reconstituted_model = tf.keras.models.load_model(MODEL_FILE_NAME)
554558
test_x_packaged = [test_x_tf]
555559
test_y_packaged = [test_y_tf]

0 commit comments

Comments
 (0)