Update phishing_email_detection_gpt2.py

david-thrower · web-flow · commit 6c32a49b6d17 · 2025-09-15T19:10:46.000-04:00
Variable name mismatch ....
diff --git a/phishing_email_detection_gpt2.py b/phishing_email_detection_gpt2.py
@@ -862,29 +862,29 @@ def call(self, inputs):
     # Tokenize the text
     half_sample_tokenized = tokenizer(
         half_sample,
-        max_length=max_seq_length,
+        max_length=MAX_SEQ_LENGTH,
         padding='max_length',
         truncation=True,
         add_special_tokens=False
     )['input_ids']
     
-    # Extract token IDs as a list of integers (not tensors)
-    if isinstance(half_sample_tokenized, dict):
-        # If tokenizer returns a dict, extract the token IDs
-        token_ids = half_sample_tokenized['input_ids']  # or 'token_ids' depending on your tokenizer
-    else:
-        # If tokenizer returns a list directly
-        token_ids = half_sample_tokenized
+    # # Extract token IDs as a list of integers (not tensors)
+    # if isinstance(half_sample_tokenized, dict):
+    #     # If tokenizer returns a dict, extract the token IDs
+    #     token_ids = half_sample_tokenized['input_ids']  # or 'token_ids' depending on your tokenizer
+    # else:
+    #     # If tokenizer returns a list directly
+    #     token_ids = half_sample_tokenized
     
-    # Convert to Python list of integers if it's a tensor
-    if hasattr(token_ids, 'numpy'):
-        token_ids = token_ids.numpy().tolist()
-    if not isinstance(token_ids, list):
-        token_ids = list(token_ids)
+    # # Convert to Python list of integers if it's a tensor
+    # if hasattr(token_ids, 'numpy'):
+    #     token_ids = token_ids.numpy().tolist()
+    # if not isinstance(token_ids, list):
+    #     token_ids = list(token_ids)
     
     # Now pass the list of integers to your generate method
-    generated_tokens = generator.generate(
-        token_ids=token_ids,  # This should now be a list of integers
+    generated_tokens = reconstituted_generator.generate(
+        token_ids=half_sample_tokenized,  # This should now be a list of integers
         do_sample=False,
         max_new_tokens=40
     )
@@ -972,7 +972,7 @@ def call(self, inputs):
     # Tokenize the text
     half_sample_tokenized = tokenizer(
         half_sample,
-        max_length=max_seq_length,
+        max_length=MAX_SEQ_LENGTH,
         padding='max_length',
         truncation=True,
         add_special_tokens=False
@@ -994,7 +994,7 @@ def call(self, inputs):
     
     # Now pass the list of integers to your generate method
     generated_tokens = reconstituted_generator.generate(
-        token_ids=token_ids,  # This should now be a list of integers
+        token_ids=half_sample_tokenized,  # This should now be a list of integers
         do_sample=False,
         max_new_tokens=40
     )