Skip to content

Commit f3049e2

Browse files
Update phishing_email_detection_gpt2.py
Reduce numcer of samples...
1 parent 552e131 commit f3049e2

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

phishing_email_detection_gpt2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import time
2222
from gc import collect
2323
from os.path import getsize
24-
24+
import re
2525

2626
# Text encoding / embedding related constants
2727

@@ -64,7 +64,7 @@
6464
num_lateral_connection_tries_per_unit = 25
6565
learning_rate = 3 * 10 ** -4 # 3 * 10 ** -3
6666
epochs = 15 #
67-
batch_size = 7 # 17
67+
batch_size = 5 # 17
6868
gradient_accumulation_steps = 1
6969
minimum_levels = 2
7070
maximum_levels = 2 # [3,7]
@@ -164,7 +164,7 @@ def prepare_data(data, max_seq_length: int = MAX_SEQ_LENGTH):
164164

165165
## Only add re, tokenizer already in script
166166

167-
import re
167+
168168

169169
from transformers import AutoTokenizer
170170

@@ -231,7 +231,7 @@ def package_non_instruct_text(text: str, desired_samples: int, max_length_tokens
231231
return samples
232232

233233
# Separate into samples
234-
non_instruct_samples = package_non_instruct_text(text=bible, desired_samples=50, max_length_tokens=1200)
234+
non_instruct_samples = package_non_instruct_text(text=bible, desired_samples=30, max_length_tokens=1200)
235235

236236
del(bible)
237237
collect()

0 commit comments

Comments
 (0)