Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 58 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import csv
import json
import ast
from datetime import datetime
from collections import defaultdict
from src.JobTracker.utils import EmailMessage
Expand All @@ -10,14 +11,16 @@
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

def process_email(email_path):
def process_email(email_path, old_csv_path):
'''
Process the emails at the given path and return the results.

:param email_path: Path to the email file or directory
:return: List of processed email data
'''
em = EmailMessage(email_path)

old_file = read_csv(old_csv_path) if old_csv_path else None
em = EmailMessage(email_path, old_file)
mail_info = em.get_mail_info()
res = []
chatbot = ChatGPT()
Expand All @@ -31,20 +34,19 @@ def process_email(email_path):
elif k.lower() == "n":
logging.info("---------Stop processing emails---------")
return
companys = defaultdict(list)

companys = buildOldCompany(old_file) if old_file else defaultdict(list)
key = ['subject', 'sender_name', 'sender_mail', 'recipient_name', 'recipient_mail', 'date', 'body', 'length', 'company', 'state', 'next_step', 'rank']
for mail in mail_info:
state, data = chatbot.get_content(mail)
if state == 'Succeed':
content = list(data.values())
companys[data['company']].append(content)

for content in companys.values():
if len(content) != 0:
content.sort(key=lambda a: a[-1])
combined_list = [list(column) for column in zip(*content)]
data = dict(zip(key[:-1], combined_list[:-1]))
data["next_step"] = data["next_step"][-1]
res.append(data)
return res

Expand All @@ -56,8 +58,51 @@ def export_to_csv(data, filename):
for row in data:
writer.writerow(row)

def main(email_path, output_csv):
result = process_email(email_path)
def read_csv(filename):
if not filename:
return []
data = []
try:
with open(filename, mode='r', newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data.append(row)
except FileNotFoundError as e:
logging.warning(f"File not found or path incorrect: {e}")
return []
except Exception as e:
logging.warning(f"Error reading the file: {e}")
return []
return data

def buildOldCompany(old_companys):
companys = defaultdict(list)
for company in old_companys:
data = list(company.values())
formatData = [ast.literal_eval(element) for element in data]
datetime = [convertDate(value) for value in formatData[5]]
name = formatData[8][0]
formatData.append(datetime)
res = [list(column) for column in zip(*formatData)]
companys[name] += res
return companys

def convertDate(date):
try:
date_object = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z")
return date_object
except ValueError:
try:
date_object = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z (%Z)")
return date_object
except ValueError:
logging.warn("not able to parse date")
return datetime.max



def main(email_path, output_csv, old_csv_path):
result = process_email(email_path, old_csv_path)
if result:
export_to_csv(result, output_csv)
logging.info(f"Processed emails successfully and exported to CSV at {output_csv}.")
Expand All @@ -75,5 +120,10 @@ def main(email_path, output_csv):
help='The output path for the CSV file',
default='emails.csv', # Default output filename if not specified
required=False)
parser.add_argument("-d", '--old',
type=str,
help='The old CSV file to avoid duplicate',
default=None,
required=False)
args = parser.parse_args()
main(args.path, args.output)
main(args.path, args.output, args.old)
2 changes: 1 addition & 1 deletion src/JobTracker/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_content(self, info):
date_object = datetime.strptime(info['date'], "%a, %d %b %Y %H:%M:%S %z (%Z)")
month_day_year_time = date_object.strftime("%b %d %Y %H:%M:%S")
except ValueError:
print("Unable to parse date")
logging.warn("Unable to parse date")
info['state'] = json.dumps({info['state']:month_day_year_time})
info['rank'] = date_object
return ('Succeed', info)
Expand Down
2 changes: 0 additions & 2 deletions src/JobTracker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@


OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
LOCAL_MODEL="llama2"
LOCAL_MODEL_URL = "http://127.0.0.1:11434/api/generate"

MODEL = 'gpt-4-1106-preview'
PRICE = {
Expand Down
55 changes: 38 additions & 17 deletions src/JobTracker/utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import re
import email.utils
import mailbox
import hashlib
import requests
import json
import ast
from .config import KEYWORD
from email.header import decode_header
from email.utils import parseaddr
from bs4 import BeautifulSoup

class EmailMessage:

def __init__(self, mbox_path):
def __init__(self, mbox_path, old_csv_mails):
self.mail_lst = mailbox.mbox(mbox_path)
self.old_mail_list = self.get_old_csv_hash_list(old_csv_mails)
self.url_pattern = re.compile(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')

Expand Down Expand Up @@ -53,21 +57,38 @@ def get_mail_info(self):
# Loop over every mail and get info
for mail in self.mail_lst:
info = dict()
subject = decode_header(mail['subject'])
subject = ''.join(part.decode(charset or 'utf-8') if isinstance(part, bytes) else part
for part, charset in subject)
sender_name, sender_mail = email.utils.parseaddr(mail['from'])
recipient_name, recipient_mail = email.utils.parseaddr(mail['to'])
date = mail['date']
body = self.get_mail_body(mail)
if self.related_to_application(body + subject):
info['subject'] = subject
info['sender_name'] = sender_name
info['sender_mail'] = sender_mail
info['recipient_name'] = recipient_name
info['recipient_mail'] = recipient_mail
info['date'] = date
info['body'] = body
info['length'] = len(body)
res.append(info)
sender_name, sender_mail = email.utils.parseaddr(mail['from'])
if self.get_hash(sender_mail+date) not in self.old_mail_list:
subject = decode_header(mail['subject'])
subject = ''.join(part.decode(charset or 'utf-8') if isinstance(part, bytes) else part
for part, charset in subject)
recipient_name, recipient_mail = email.utils.parseaddr(mail['to'])
body = self.get_mail_body(mail)
if self.related_to_application(body + subject):
info['subject'] = subject
info['sender_name'] = sender_name
info['sender_mail'] = sender_mail
info['recipient_name'] = recipient_name
info['recipient_mail'] = recipient_mail
info['date'] = date
info['body'] = body
info['length'] = len(body)
res.append(info)
return res


def get_old_csv_hash_list(self, mails):
if not mails:
return {}
old_pools = set()
for mail in mails:
sender = ast.literal_eval(mail['sender_mail'])
date = ast.literal_eval(mail['date'])
for index in range(len(sender)):
key = sender[index] + date[index]
old_pools.add(self.get_hash(key))
return old_pools

def get_hash(self, key):
return hashlib.sha256(key.encode()).hexdigest()
43 changes: 41 additions & 2 deletions tests/test_chatbot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import unittest
from datetime import datetime
from unittest.mock import patch, MagicMock
from src.JobTracker.utils import EmailMessage
from src.JobTracker.chatbot import ChatBot, ChatGPT

MODEL_LIST_RETURN = {
Expand Down Expand Up @@ -44,7 +46,7 @@ def setUp(self, mock_model_list):
@patch('openai.ChatCompletion.create')
@patch('src.JobTracker.config')
def test_get_content_succeed(self, mock_config, mock_openai_chatcompletion_create):
info = {'body': 'test email body'}
info = {'body': 'test email body', 'date': "Thu, 09 Nov 2023 23:27:06 +0000"}
mock_config.API_KEY = 'test_api_key'
mock_config.FUNCTION = 'test_function'
# Mock the API response
Expand All @@ -64,9 +66,11 @@ def test_get_content_succeed(self, mock_config, mock_openai_chatcompletion_creat
mock_response.choices = [mock_choice]
mock_openai_chatcompletion_create.return_value = mock_response
state, data = self.chat_gpt.get_content(info)
date_object = datetime.strptime(info['date'], "%a, %d %b %Y %H:%M:%S %z")
month_day_year_time = date_object.strftime("%b %d %Y %H:%M:%S")
self.assertEqual(state, 'Succeed')
self.assertEqual(data['company'], 'TestCompany')
self.assertEqual(data['state'], 'TestState')
self.assertEqual(data['state'], json.dumps({"TestState": month_day_year_time}))
self.assertEqual(data['next_step'], 'TestNextStep')

@patch('openai.ChatCompletion.create')
Expand All @@ -88,6 +92,41 @@ def test_get_content_failed(self, mock_config, mock_openai_chatcompletion_create
self.assertEqual(state, 'Failed')
self.assertEqual(data, 'Not related to a job application or interview process')

@patch('mailbox.mbox', return_value=MagicMock())
@patch('openai.ChatCompletion.create')
@patch('src.JobTracker.config')
def test_avoid_hash_success(self, mock_config, mock_openai_chatcompletion_create, mock_mbox):
info = {'body': 'test email body', 'date': "Thu, 09 Nov 2023 23:27:06 +0000", "sender_mail" : "noreply@careers.tiktok.com"}
mock_config.API_KEY = 'test_api_key'
mock_config.FUNCTION = 'test_function'
# Mock the API response
mock_choice = MagicMock()
mock_choice.finish_reason = 'function_call'
mock_choice.message = {
'function_call': {
'arguments': json.dumps({
'company': 'TestCompany',
'state': 'TestState',
'next_step': 'TestNextStep'
})
}
}
oldMail = {"sender_mail" : '["noreply@careers.tiktok.com"]', 'date': '["Thu, 09 Nov 2023 23:27:06 +0000"]'}

self.mbox_path = 'path/to/mbox'
self.mbox_old_path = ''
self.email_message = EmailMessage(self.mbox_path, self.mbox_old_path)

pool = self.email_message.get_old_csv_hash_list([oldMail])

mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_openai_chatcompletion_create.return_value = mock_response
state, data = self.chat_gpt.get_content(info)

self.assertEqual(state, 'Succeed')
self.assertEqual(self.email_message.get_hash(data['sender_mail'] + data['date']) in pool, True)


if __name__ == '__main__':
unittest.main()
3 changes: 2 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ class TestEmailMessage(unittest.TestCase):
@patch('mailbox.mbox', return_value=MagicMock())
def setUp(self, mock_mbox):
self.mbox_path = 'path/to/mbox'
self.email_message = EmailMessage(self.mbox_path)
self.mbox_old_path = ''
self.email_message = EmailMessage(self.mbox_path, self.mbox_old_path)

def test_clenup_body(self):
text = "This is a test message. Visit http://example.com for details.\r\nNew line here."
Expand Down