Skip to content

Commit 78672d0

Browse files
authored
Merge pull request #84 from networktocode/kc_aws_parser
AWS Direct Parser
2 parents 2fd30d7 + ccd1f72 commit 78672d0

16 files changed

+423
-0
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
- #86 - Fix `CombinedProcessor` carries over data from previous parsing
88

9+
### Added
10+
11+
- #84 - New parser added for text. Added new provider `AquaComms` using `Text` and `EmailSubjectParser`
12+
913
## v2.0.1 - 2021-09-16
1014

1115
### Fixed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using
4343

4444
#### Supported providers based on other parsers
4545

46+
- AWS
4647
- AquaComms
4748
- Cogent
4849
- Colt

circuit_maintenance_parser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .provider import (
88
GenericProvider,
99
AquaComms,
10+
AWS,
1011
Cogent,
1112
Colt,
1213
EUNetworks,
@@ -29,6 +30,7 @@
2930
SUPPORTED_PROVIDERS = (
3031
GenericProvider,
3132
AquaComms,
33+
AWS,
3234
Cogent,
3335
Colt,
3436
EUNetworks,

circuit_maintenance_parser/parser.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,26 @@ def parser_hook(self, raw: bytes):
226226
def parse_csv(raw: bytes) -> List[Dict]:
227227
"""Custom CSV parsing."""
228228
raise NotImplementedError
229+
230+
231+
class Text(Parser):
232+
"""Text parser."""
233+
234+
_data_types = ["text/plain"]
235+
236+
def parser_hook(self, raw: bytes):
237+
"""Execute parsing."""
238+
result = []
239+
text = self.get_text_hook(raw)
240+
for data in self.parse_text(text):
241+
result.append(data)
242+
return result
243+
244+
@staticmethod
245+
def get_text_hook(raw: bytes) -> str:
246+
"""Can be overwritten by subclasses."""
247+
return raw.decode()
248+
249+
def parse_text(self, text) -> List[Dict]:
250+
"""Custom text parsing."""
251+
raise NotImplementedError
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""AquaComms parser."""
2+
import hashlib
3+
import logging
4+
import quopri
5+
import re
6+
7+
import bs4 # type: ignore
8+
9+
from dateutil import parser
10+
11+
from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Impact, Status, Text
12+
13+
# pylint: disable=too-many-nested-blocks, too-many-branches
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
class SubjectParserAWS1(EmailSubjectParser):
19+
"""Subject parser for AWS notifications."""
20+
21+
def parse_subject(self, subject):
22+
"""Parse subject.
23+
24+
Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001]
25+
"""
26+
data = {}
27+
search = re.search(r"\[AWS Account ?I?D?: ([0-9]+)\]", subject)
28+
if search:
29+
data["account"] = search.group(1)
30+
return [data]
31+
32+
33+
class TextParserAWS1(Text):
34+
"""Parse text body of email."""
35+
36+
@staticmethod
37+
def get_text_hook(raw):
38+
"""Modify soup before entering `parse_text`."""
39+
soup = bs4.BeautifulSoup(quopri.decodestring(raw), features="lxml")
40+
return soup.text
41+
42+
def parse_text(self, text):
43+
"""Parse text.
44+
45+
Example:
46+
Hello,
47+
48+
Planned maintenance has been scheduled on an AWS Direct Connect router in A=
49+
Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 Ma=
50+
y 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS D=
51+
irect Connect services listed below may become unavailable.
52+
53+
aaaaa-00000001
54+
aaaaa-00000002
55+
aaaaa-00000003
56+
aaaaa-00000004
57+
aaaaa-00000005
58+
aaaaa-00000006
59+
60+
This maintenance is scheduled to avoid disrupting redundant connections at =
61+
the same time.
62+
"""
63+
data = {"circuits": []}
64+
impact = Impact.OUTAGE
65+
maintenace_id = ""
66+
status = Status.CONFIRMED
67+
for line in text.splitlines():
68+
if "planned maintenance" in line.lower():
69+
data["summary"] = line
70+
search = re.search(
71+
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})",
72+
line,
73+
)
74+
if search:
75+
data["start"] = self.dt2ts(parser.parse(search.group(1)))
76+
data["end"] = self.dt2ts(parser.parse(search.group(2)))
77+
maintenace_id += str(data["start"])
78+
maintenace_id += str(data["end"])
79+
if "may become unavailable" in line.lower():
80+
impact = Impact.OUTAGE
81+
elif "has been cancelled" in line.lower():
82+
status = Status.CANCELLED
83+
elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
84+
maintenace_id += line
85+
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
86+
# No maintenance ID found in emails, so a hash value is being generated using the start,
87+
# end and IDs of all circuits in the notification.
88+
data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec
89+
data["status"] = status
90+
return [data]

circuit_maintenance_parser/provider.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from circuit_maintenance_parser.processor import CombinedProcessor, SimpleProcessor, GenericProcessor
1616

1717
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
18+
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
1819
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1
1920
from circuit_maintenance_parser.parsers.colt import ICalParserColt1, CsvParserColt1
2021
from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1
@@ -116,6 +117,15 @@ class AquaComms(GenericProvider):
116117
_default_organizer = "[email protected]"
117118

118119

120+
class AWS(GenericProvider):
121+
"""AWS provider custom class."""
122+
123+
_processors: List[GenericProcessor] = [
124+
CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]),
125+
]
126+
_default_organizer = "[email protected]"
127+
128+
119129
class Cogent(GenericProvider):
120130
"""Cogent provider custom class."""
121131

tests/unit/data/aws/aws1.eml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
Subject: [rCluster Request] [rCloud AWS Notification] AWS Direct Connect
2+
Planned Maintenance Notification [AWS Account: 0000000000001]
3+
MIME-Version: 1.0
4+
Content-Type: text/plain; charset="UTF-8"
5+
Content-Transfer-Encoding: quoted-printable
6+
X-SM-COMMUNICATION: true
7+
X-SM-COMMUNICATION-TYPE: AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED
8+
X-SM-DEDUPING-ID: 7cc8bab7-00bb-44e0-a3ec-bdd1a5560b80-EMAIL--1012261942-036424c1a19ca69ca7ea459ebd6823e1
9+
Date: Thu, 6 May 2021 21:52:56 +0000
10+
Feedback-ID: 1.us-east-1.xvKJ2gIiw98/SnInpbS9SQT1XBoAzwrySbDsqgMkBQI=:AmazonSES
11+
X-SES-Outgoing: 2021.05.06-54.240.48.83
12+
X-Original-Sender: [email protected]
13+
X-Original-Authentication-Results: mx.google.com; dkim=pass
14+
[email protected] header.s=szqgv33erturdv5cvz4vtb5qcy53gdkn
15+
header.b=IQc0x0aC; dkim=pass [email protected]
16+
header.s=ug7nbtf4gccmlpwj322ax3p6ow6yfsug header.b=X4gZtDlT; spf=pass
17+
(google.com: domain of 0100017943ab6519-f09ba161-049c-45e4-8ff3-698af4d94f86-000000@amazonses.com
18+
designates 54.240.48.83 as permitted sender) smtp.mailfrom=0100017943ab6519-f09ba161-049c-45e4-8ff3-698af4d94f86-000000@amazonses.com;
19+
dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=amazon.com
20+
X-Original-From: "Amazon Web Services, Inc." <[email protected]>
21+
Reply-To: "Amazon Web Services, Inc." <[email protected]>
22+
Precedence: list
23+
24+
Hello,
25+
26+
Planned maintenance has been scheduled on an AWS Direct Connect router in A=
27+
Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 Ma=
28+
y 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS D=
29+
irect Connect services listed below may become unavailable.
30+
31+
aaaaa-00000001
32+
aaaaa-00000002
33+
aaaaa-00000003
34+
aaaaa-00000004
35+
aaaaa-00000005
36+
aaaaa-00000006
37+
38+
This maintenance is scheduled to avoid disrupting redundant connections at =
39+
the same time.
40+
41+
If you encounter any problems with your connection after the end of this ma=
42+
intenance window, please contact AWS Support[1].
43+
44+
[1] https://aws.amazon.com/support
45+
46+
Sincerely,
47+
Amazon Web Services
48+
49+
Amazon Web Services, Inc. is a subsidiary of Amazon.com, Inc. Amazon.com is=
50+
a registered trademark of Amazon.com, Inc. This message was produced and d=
51+
istributed by Amazon Web Services Inc., 410 Terry Ave. North, Seattle, WA 9=
52+
8109-5210.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
[
2+
{
3+
"account": "0000000000001",
4+
"circuits": [
5+
{
6+
"circuit_id": "aaaaa-00000001",
7+
"impact": "OUTAGE"
8+
},
9+
{
10+
"circuit_id": "aaaaa-00000002",
11+
"impact": "OUTAGE"
12+
},
13+
{
14+
"circuit_id": "aaaaa-00000003",
15+
"impact": "OUTAGE"
16+
},
17+
{
18+
"circuit_id": "aaaaa-00000004",
19+
"impact": "OUTAGE"
20+
},
21+
{
22+
"circuit_id": "aaaaa-00000005",
23+
"impact": "OUTAGE"
24+
},
25+
{
26+
"circuit_id": "aaaaa-00000006",
27+
"impact": "OUTAGE"
28+
}
29+
],
30+
"end": 1621519200,
31+
"maintenance_id": "15faf02fcf2e999792668df97828bc76",
32+
"organizer": "[email protected]",
33+
"provider": "aws",
34+
"sequence": 1,
35+
"stamp": 1620337976,
36+
"start": 1621497600,
37+
"status": "CONFIRMED",
38+
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect router in A Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 May 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable.",
39+
"uid": "0"
40+
}
41+
]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[
2+
{
3+
"account": "0000000000001"
4+
}
5+
]
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
[
2+
{
3+
"circuits": [
4+
{
5+
"circuit_id": "aaaaa-00000001",
6+
"impact": "OUTAGE"
7+
},
8+
{
9+
"circuit_id": "aaaaa-00000002",
10+
"impact": "OUTAGE"
11+
},
12+
{
13+
"circuit_id": "aaaaa-00000003",
14+
"impact": "OUTAGE"
15+
},
16+
{
17+
"circuit_id": "aaaaa-00000004",
18+
"impact": "OUTAGE"
19+
},
20+
{
21+
"circuit_id": "aaaaa-00000005",
22+
"impact": "OUTAGE"
23+
},
24+
{
25+
"circuit_id": "aaaaa-00000006",
26+
"impact": "OUTAGE"
27+
}
28+
],
29+
"end": 1621519200,
30+
"maintenance_id": "15faf02fcf2e999792668df97828bc76",
31+
"start": 1621497600,
32+
"status": "CONFIRMED",
33+
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect router in A Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 May 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable."
34+
}
35+
]

0 commit comments

Comments
 (0)