-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbayesianAgent.py
More file actions
61 lines (45 loc) · 1.99 KB
/
bayesianAgent.py
File metadata and controls
61 lines (45 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
from emailreader import EmailReader
from wordcount import WordCounter
class BayesianAgent:
def __init__(self):
super().__init__()
self.reader = EmailReader()
if "ham_word.json" in os.listdir() and "spam_word.json" in os.listdir() and "spamonham.txt" in os.listdir():
self.wordy = WordCounter(file_available=True)
else:
self.wordy = WordCounter(file_available=False)
self.load_weight()
def load_weight(self):
# Load data from emails
ham_vec = [self.word.text2vec(self.word.html_to_plain(self.word.load_email(is_spam=False, filename=name))) for name in self.word.ham_filenames]
spam_vec = [self.word.text2vec(self.word.html_to_plain(self.word.load_email(is_spam=True, filename=name))) for name in self.word.spam_filenames]
# Count appearance
for ham in ham_vec:
for word in ham:
self.wordy.update_word(word, False)
self.wordy.update_email(False)
for spam in spam_vec:
for word in spam:
self.wordy.update_word(word, True)
self.wordy.update_email(True)
# Save updated data
self.wordy.save_dict()
def predict(self, text):
vec = self.reader.text2vec(text)
prob_spam = self.wordy.number_of_spam / (self.wordy.number_of_ham + self.wordy.number_of_spam)
prob_ham = self.wordy.number_of_ham / (self.wordy.number_of_ham + self.wordy.number_of_spam)
prob_word_spam = 1
prob_word_ham = 1
for v in vec:
hammy = self.wordy.get_ham_prob(v)
spammy = self.wordy.get_spam_prob(v)
if hammy != -1 and spammy != -1:
prob_word_ham *= hammy
prob_word_spam *= spammy
prob = (prob_spam * prob_word_spam) / ((prob_spam * prob_word_spam) + (prob_ham * prob_word_ham))
print(prob)
if prob > 0.5:
print("Spam")
else:
print('Ham')