Skip to content

Commit 3561a71

Browse files
Merge pull request #480 from tutorcruncher/email-spam-detection
2 parents 03a2351 + 3db3c09 commit 3561a71

File tree

18 files changed

+698
-24
lines changed

18 files changed

+698
-24
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
options: --entrypoint redis-server
3030

3131
env:
32-
DATABASE_URL: 'postgresql://postgres:postgres@localhost:5432/morpheus_test'
32+
TEST_DATABASE_URL: 'postgresql://postgres:postgres@localhost:5432/morpheus_test'
3333

3434
steps:
3535
- uses: actions/checkout@v2
@@ -45,7 +45,6 @@ jobs:
4545
pip freeze
4646
- name: lint
4747
run: make lint
48-
4948
- name: test
5049
run: make test
5150

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ env/
55
build/
66
dist/
77
.coverage
8+
.env

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.9.14

Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,19 @@ lint:
2020
.PHONY: test
2121
test:
2222
pytest tests/ --cov=src
23+
24+
.PHONY: reset-db
25+
reset-db:
26+
psql -h localhost -U postgres -c "DROP DATABASE IF EXISTS morpheus"
27+
psql -h localhost -U postgres -c "CREATE DATABASE morpheus"
28+
psql -h localhost -U postgres -d morpheus -f src/models.sql
29+
foxglove patch add_aggregation_view --live --patch-args ':'
30+
foxglove patch add_spam_status_and_reason_to_messages --live --patch-args ':'
31+
32+
# Run a specific patch by name:
33+
# make run_patch PATCH=patch_function_name
34+
# make run_patch PATCH=patch_function_name LIVE=1
35+
.PHONY: run_patch
36+
run_patch:
37+
foxglove patch $(PATCH) $(if $(LIVE),--live,) --patch-args ':'
38+

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ uvicorn==0.20.0
2323
ipython==8.11.0
2424
py==1.11.0
2525
setuptools==78.0.2
26+
openai==1.85.0

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ filterwarnings =
44
error
55
ignore::DeprecationWarning:asyncio.base_events
66
timeout = 20
7+
markers =
8+
spam: Using this marker on a test will mark the emails that test as spam.
79

810
[flake8]
911
max-line-length = 120

src/llm_client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from foxglove import glove
2+
from openai import AsyncOpenAI
3+
4+
_client = None
5+
6+
7+
def get_openai_client():
8+
global _client
9+
if _client is None: # pragma: no cover
10+
api_key = glove.settings.openai_api_key
11+
if not api_key:
12+
raise RuntimeError('OPENAI_API_KEY is not set in the environment.')
13+
_client = AsyncOpenAI(api_key=api_key)
14+
return _client # pragma: no cover

src/patches.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import asyncio
2+
from foxglove import glove
23
from foxglove.db.patches import patch, run_sql_section
34
from textwrap import dedent, indent
45
from time import time
56
from tqdm import tqdm
67

78

89
@patch
9-
async def run_logic_sql(conn, settings, **kwargs):
10+
async def run_logic_sql(conn, **kwargs):
1011
"""
1112
run the "logic" section of models.sql
1213
"""
14+
settings = glove.settings
1315
await run_sql_section('logic', settings.sql_path.read_text(), conn)
1416

1517

@@ -35,7 +37,7 @@ async def chunked_update(conn, table, sql, sleep_time: float = 0):
3537

3638

3739
@patch
38-
async def performance_step1(conn, settings, **kwargs):
40+
async def performance_step1(conn, **kwargs):
3941
"""
4042
First step to changing schema to improve performance. THIS WILL BE SLOW, but can be run in the background.
4143
"""
@@ -81,7 +83,7 @@ async def performance_step1(conn, settings, **kwargs):
8183

8284

8385
@patch(direct=True)
84-
async def performance_step2(conn, settings, **kwargs):
86+
async def performance_step2(conn, **kwargs):
8587
"""
8688
Second step to changing schema to improve performance. THIS WILL BE VERY SLOW, but can be run in the background.
8789
"""
@@ -119,7 +121,7 @@ async def performance_step2(conn, settings, **kwargs):
119121

120122

121123
@patch(direct=True)
122-
async def performance_step3(conn, settings, **kwargs):
124+
async def performance_step3(conn, **kwargs):
123125
"""
124126
Third step to changing schema to improve performance. THIS WILL BE VERY SLOW, but can be run in the background.
125127
"""
@@ -145,7 +147,7 @@ async def performance_step3(conn, settings, **kwargs):
145147

146148

147149
@patch
148-
async def performance_step4(conn, settings, **kwargs):
150+
async def performance_step4(conn, **kwargs):
149151
"""
150152
Fourth step to changing schema to improve performance. This should not be too slow, but will LOCK ENTIRE TABLES.
151153
"""
@@ -197,8 +199,25 @@ async def performance_step4(conn, settings, **kwargs):
197199

198200

199201
@patch
200-
async def add_aggregation_view(conn, settings, **kwargs):
202+
async def add_aggregation_view(conn, **kwargs):
201203
"""
202204
run the "message_aggregation" section of models.sql
203205
"""
206+
settings = glove.settings
204207
await run_sql_section('message_aggregation', settings.sql_path.read_text(), conn)
208+
209+
210+
@patch(auto_run=True)
211+
async def add_spam_status_and_reason_to_messages(conn, **kwargs):
212+
"""
213+
Add spam_status and spam_reason columns to the messages table.
214+
"""
215+
print('Adding spam_status and spam_reason columns to messages table')
216+
await conn.execute(
217+
"""
218+
ALTER TABLE messages
219+
ADD COLUMN IF NOT EXISTS spam_status BOOLEAN DEFAULT FALSE,
220+
ADD COLUMN IF NOT EXISTS spam_reason TEXT;
221+
"""
222+
)
223+
print('Added spam_status and spam_reason columns')

src/schemas/messages.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class MessageStatus(str, Enum):
4646
soft_bounce = 'soft_bounce'
4747
open = 'open'
4848
click = 'click'
49-
spam = 'spam'
49+
spam = 'spam' # this status is used when recipient marks the email as spam
5050
unsub = 'unsub'
5151
reject = 'reject'
5252

src/settings.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1+
from dotenv import load_dotenv
12
from foxglove import BaseSettings
23
from pathlib import Path
34
from pydantic import NoneStr, validator
45
from typing import List
56

7+
load_dotenv()
8+
69
THIS_DIR = Path(__file__).parent.resolve()
710

811

912
class Settings(BaseSettings):
1013
pg_dsn = 'postgresql://postgres@localhost:5432/morpheus'
1114
sql_path: Path = THIS_DIR / 'models.sql'
12-
patch_paths: List[str] = ['app.patches']
15+
patch_paths: List[str] = ['src.patches']
1316

1417
cookie_name = 'morpheus'
1518
auth_key = 'insecure'
@@ -45,10 +48,21 @@ class Settings(BaseSettings):
4548
canada_send_number = '12048170659'
4649
tc_registered_originator = 'TtrCrnchr'
4750

51+
enable_spam_check: bool = True
52+
min_recipients_for_spam_check: int = 20
53+
llm_model_name: str = 'gpt-4o'
54+
openai_api_key: str = None
55+
4856
@validator('pg_dsn')
4957
def heroku_ready_pg_dsn(cls, v):
5058
return v.replace('gres://', 'gresql://')
5159

60+
@validator('test_output', pre=True, always=True)
61+
def ensure_test_output_path(cls, v): # pragma: no cover
62+
if v is None or isinstance(v, Path):
63+
return v
64+
return Path(v)
65+
5266
@property
5367
def mandrill_webhook_url(self):
5468
return f'https://{self.host_name}/webhook/mandrill/'
@@ -70,4 +84,10 @@ class Config:
7084
'auth_key': {'env': 'AUTH_KEY'},
7185
'user_auth_key': {'env': 'USER_AUTH_KEY'},
7286
'host_name': {'env': 'HOST_NAME'},
87+
'enable_spam_check': {'env': 'ENABLE_SPAM_CHECK'},
88+
'min_recipients_for_spam_check': {'env': 'MIN_RECIPIENTS_FOR_SPAM_CHECK'},
89+
'llm_model_name': {'env': 'LLM_MODEL_NAME'},
90+
'openai_api_key': {'env': 'OPENAI_API_KEY'},
91+
'test_output': {'env': 'TEST_OUTPUT'},
7392
}
93+
env_file = '.env'

0 commit comments

Comments
 (0)