This repository was archived by the owner on Sep 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathemail_parser.py
More file actions
64 lines (59 loc) · 2 KB
/
email_parser.py
File metadata and controls
64 lines (59 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Utility for Copying Files from an SFTP Server to an S3 Bucket
#
# Copyright 2017 Jonas McCallum.
#
# Open source, MIT license
# http://www.opensource.org/licenses/mit-license.php
"""
Email parser
Author: Jonas McCallum
https://github.com/foobarmus
"""
import re
TOK_SPEC = [('CRUD', '.+:', ''),
('EOL', '$', ''),
('TO', '.+', '(?<=To:)'),
('FROM', '.+', '(?<=From:)'),
('REGION', '.+', '(?<=Region:)'),
('SUBJECT', '.+', '(?<=Subject:)'),
('BODY', '.+', '')]
TOKEN = '{}(?P<{}>{})'
TOKENS = (TOKEN.format(lookbehind, label, expression)
for label, expression, lookbehind in TOK_SPEC)
RE = re.compile('|'.join(TOKENS), re.MULTILINE)
HEADER_LABELS = ['TO', 'FROM', 'REGION', 'SUBJECT']
IGNORE = ['CRUD', 'EOL']
def tokenize(text, re_, ignore):
"""tokenize an email template"""
tokens = []
pos = 0
match = re_.match(text)
while pos < len(text):
typ = match.lastgroup
if typ in ignore:
pos = max(match.end(), pos + 1)
elif typ == 'BODY':
tok = text[pos:]
tokens.append((typ, tok))
break
else:
tok = match.group(typ).strip()
tokens.append((typ, tok))
pos = match.end()
match = re_.match(text, pos)
return tokens
def parse_email(template, **args):
"""populate and parse an email template"""
with open(template, 'r') as f:
text = f.read()
personalized_template = text.format(**args)
tokens = tokenize(personalized_template, RE, IGNORE)
header = {k:v.strip() for k, v in tokens
if k in HEADER_LABELS}
body = tokens[-1][-1].strip()
email = {'Destination': {'ToAddresses': [header['TO']]},
'Message': {'Body': {'Text': {'Charset': 'UTF-8', 'Data': body}},
'Subject': {'Charset': 'UTF-8', 'Data': header['SUBJECT']}},
'Source': header['FROM']}
region = header['REGION']
return email, region