Skip to content

Commit c350a44

Browse files
Merge branch 'develop' into extend-readme
2 parents 70c6c56 + 491ea99 commit c350a44

24 files changed

+607
-26
lines changed

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
# Changelog
22

3+
## v2.0.3
4+
5+
### Added
6+
7+
- #91 - `Provider` now adds `_include_filter` and `_exclude_filter` attributes (using regex) to filter in and out notifications that are relevant to be parsed vs other that are not, avoiding false positives.
8+
9+
## v2.0.2 - 2021-09-28
10+
11+
### Fixed
12+
13+
- #86 - Fix `CombinedProcessor` carries over data from previous parsing
14+
15+
### Added
16+
17+
- #84 - New parser added for text. Added new provider `AquaComms` using `Text` and `EmailSubjectParser`
18+
319
## v2.0.1 - 2021-09-16
420

521
### Fixed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using
5959

6060
#### Supported providers based on other parsers
6161

62+
- AWS
6263
- AquaComms
6364
- Cogent
6465
- Colt
@@ -193,7 +194,7 @@ There is also a `cli` entrypoint `circuit-maintenance-parser` which offers easy
193194
circuit-maintenance-parser --data-file "/tmp/___ZAYO TTN-00000000 Planned MAINTENANCE NOTIFICATION___.eml" --data-type email --provider-type zayo
194195
Circuit Maintenance Notification #0
195196
{
196-
"account": "my_account",
197+
"account": "some account",
197198
"circuits": [
198199
{
199200
"circuit_id": "/OGYX/000000/ /ZYO /",
@@ -297,6 +298,7 @@ The project is following Network to Code software development guidelines and is
297298
1. Define the `Parsers`(inheriting from some of the generic `Parsers` or a new one) that will extract the data from the notification, that could contain itself multiple `DataParts`. The `data_type` of the `Parser` and the `DataPart` have to match. The custom `Parsers` will be placed in the `parsers` folder.
298299
2. Update the `unit/test_parsers.py` with the new parsers, providing some data to test and validate the extracted data.
299300
3. Define a new `Provider` inheriting from the `GenericProvider`, defining the `Processors` and the respective `Parsers` to be used. Maybe you can reuse some of the generic `Processors` or maybe you will need to create a custom one. If this is the case, place it in the `processors` folder.
301+
- The `Provider` also supports the definition of a `_include_filter` and a `_exclude_filter` to limit the notifications that are actually processed, avoiding false positive errors for notification that are not relevant.
300302
4. Update the `unit/test_e2e.py` with the new provider, providing some data to test and validate the final `Maintenances` created.
301303
5. **Expose the new `Provider` class** updating the map `SUPPORTED_PROVIDERS` in `circuit_maintenance_parser/__init__.py` to officially expose the `Provider`.
302304

circuit_maintenance_parser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .provider import (
88
GenericProvider,
99
AquaComms,
10+
AWS,
1011
Cogent,
1112
Colt,
1213
EUNetworks,
@@ -29,6 +30,7 @@
2930
SUPPORTED_PROVIDERS = (
3031
GenericProvider,
3132
AquaComms,
33+
AWS,
3234
Cogent,
3335
Colt,
3436
EUNetworks,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Constants used in the library."""
2+
3+
EMAIL_HEADER_SUBJECT = "email-header-subject"
4+
EMAIL_HEADER_DATE = "email-header-date"

circuit_maintenance_parser/data.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import email
66
from pydantic import BaseModel, Extra
7+
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT, EMAIL_HEADER_DATE
8+
79

810
logger = logging.getLogger(__name__)
911

@@ -73,9 +75,8 @@ def init_from_emailmessage(cls: Type["NotificationData"], email_message) -> Opti
7375
cls.walk_email(email_message, data_parts)
7476

7577
# Adding extra headers that are interesting to be parsed
76-
data_parts.add(DataPart("email-header-subject", email_message["Subject"].encode()))
77-
# TODO: Date could be used to extend the "Stamp" time of a notification when not available, but we need a parser
78-
data_parts.add(DataPart("email-header-date", email_message["Date"].encode()))
78+
data_parts.add(DataPart(EMAIL_HEADER_SUBJECT, email_message["Subject"].encode()))
79+
data_parts.add(DataPart(EMAIL_HEADER_DATE, email_message["Date"].encode()))
7980
return cls(data_parts=list(data_parts))
8081
except Exception: # pylint: disable=broad-except
8182
logger.exception("Error found initializing data from email message: %s", email_message)

circuit_maintenance_parser/parser.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from circuit_maintenance_parser.errors import ParserError
1717
from circuit_maintenance_parser.output import Status, Impact, CircuitImpact
18+
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT, EMAIL_HEADER_DATE
1819

1920
# pylint: disable=no-member
2021

@@ -177,7 +178,7 @@ def clean_line(line):
177178
class EmailDateParser(Parser):
178179
"""Parser for Email Date."""
179180

180-
_data_types = ["email-header-date"]
181+
_data_types = [EMAIL_HEADER_DATE]
181182

182183
def parser_hook(self, raw: bytes):
183184
"""Execute parsing."""
@@ -190,7 +191,7 @@ def parser_hook(self, raw: bytes):
190191
class EmailSubjectParser(Parser):
191192
"""Parse data from subject or email."""
192193

193-
_data_types = ["email-header-subject"]
194+
_data_types = [EMAIL_HEADER_SUBJECT]
194195

195196
def parser_hook(self, raw: bytes):
196197
"""Execute parsing."""
@@ -226,3 +227,26 @@ def parser_hook(self, raw: bytes):
226227
def parse_csv(raw: bytes) -> List[Dict]:
227228
"""Custom CSV parsing."""
228229
raise NotImplementedError
230+
231+
232+
class Text(Parser):
233+
"""Text parser."""
234+
235+
_data_types = ["text/plain"]
236+
237+
def parser_hook(self, raw: bytes):
238+
"""Execute parsing."""
239+
result = []
240+
text = self.get_text_hook(raw)
241+
for data in self.parse_text(text):
242+
result.append(data)
243+
return result
244+
245+
@staticmethod
246+
def get_text_hook(raw: bytes) -> str:
247+
"""Can be overwritten by subclasses."""
248+
return raw.decode()
249+
250+
def parse_text(self, text) -> List[Dict]:
251+
"""Custom text parsing."""
252+
raise NotImplementedError
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""AquaComms parser."""
2+
import hashlib
3+
import logging
4+
import quopri
5+
import re
6+
7+
import bs4 # type: ignore
8+
9+
from dateutil import parser
10+
11+
from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Impact, Status, Text
12+
13+
# pylint: disable=too-many-nested-blocks, too-many-branches
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
class SubjectParserAWS1(EmailSubjectParser):
19+
"""Subject parser for AWS notifications."""
20+
21+
def parse_subject(self, subject):
22+
"""Parse subject.
23+
24+
Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001]
25+
"""
26+
data = {}
27+
search = re.search(r"\[AWS Account ?I?D?: ([0-9]+)\]", subject)
28+
if search:
29+
data["account"] = search.group(1)
30+
return [data]
31+
32+
33+
class TextParserAWS1(Text):
34+
"""Parse text body of email."""
35+
36+
@staticmethod
37+
def get_text_hook(raw):
38+
"""Modify soup before entering `parse_text`."""
39+
soup = bs4.BeautifulSoup(quopri.decodestring(raw), features="lxml")
40+
return soup.text
41+
42+
def parse_text(self, text):
43+
"""Parse text.
44+
45+
Example:
46+
Hello,
47+
48+
Planned maintenance has been scheduled on an AWS Direct Connect router in A=
49+
Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 Ma=
50+
y 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS D=
51+
irect Connect services listed below may become unavailable.
52+
53+
aaaaa-00000001
54+
aaaaa-00000002
55+
aaaaa-00000003
56+
aaaaa-00000004
57+
aaaaa-00000005
58+
aaaaa-00000006
59+
60+
This maintenance is scheduled to avoid disrupting redundant connections at =
61+
the same time.
62+
"""
63+
data = {"circuits": []}
64+
impact = Impact.OUTAGE
65+
maintenace_id = ""
66+
status = Status.CONFIRMED
67+
for line in text.splitlines():
68+
if "planned maintenance" in line.lower():
69+
data["summary"] = line
70+
search = re.search(
71+
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})",
72+
line,
73+
)
74+
if search:
75+
data["start"] = self.dt2ts(parser.parse(search.group(1)))
76+
data["end"] = self.dt2ts(parser.parse(search.group(2)))
77+
maintenace_id += str(data["start"])
78+
maintenace_id += str(data["end"])
79+
if "may become unavailable" in line.lower():
80+
impact = Impact.OUTAGE
81+
elif "has been cancelled" in line.lower():
82+
status = Status.CANCELLED
83+
elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
84+
maintenace_id += line
85+
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
86+
# No maintenance ID found in emails, so a hash value is being generated using the start,
87+
# end and IDs of all circuits in the notification.
88+
data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec
89+
data["status"] = status
90+
return [data]

circuit_maintenance_parser/parsers/lumen.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import bs4 # type: ignore
77
from bs4.element import ResultSet # type: ignore
88

9-
from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status
9+
from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status
1010

1111
# pylint: disable=too-many-nested-blocks, too-many-branches
1212

@@ -88,8 +88,16 @@ def parse_tables(self, tables: ResultSet, data: Dict):
8888
if "account" not in data:
8989
data["account"] = cells[idx].string
9090
if num_columns == 10:
91-
if cells[idx + 9].string == "Completed":
91+
status_string = cells[idx + 9].string
92+
if status_string == "Completed":
9293
data["status"] = Status("COMPLETED")
94+
elif status_string == "Postponed":
95+
data["status"] = Status("RE-SCHEDULED")
96+
elif status_string == "Not Completed":
97+
data["status"] = Status("CANCELLED")
98+
elif "status" not in data:
99+
# Update to an existing ticket may not include an update to the status - make a guess
100+
data["status"] = "CONFIRMED"
93101

94102
data_circuit = {}
95103
data_circuit["circuit_id"] = cells[idx + 1].string

circuit_maintenance_parser/processor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ class CombinedProcessor(GenericProcessor):
115115
# The CombinedProcessor will consolidate all the parsed data into this variable
116116
combined_maintenance_data: Dict = {}
117117

118+
def process(self, data: NotificationData, extended_data: Dict) -> Iterable[Maintenance]:
119+
"""Extend base class process method to ensure that self.combined_maintenance_data is initialized correctly."""
120+
self.combined_maintenance_data = {}
121+
return super().process(data, extended_data)
122+
118123
def process_hook(self, maintenances_extracted_data, maintenances_data):
119124
"""All the parsers contribute with a subset of data that is extended.
120125

circuit_maintenance_parser/provider.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""Definition of Provider class as the entry point to the library."""
22
import logging
3+
import re
34
import traceback
45

5-
from typing import Iterable, List
6+
from typing import Iterable, List, Dict
67

78
from pydantic import BaseModel
89

@@ -13,8 +14,10 @@
1314
from circuit_maintenance_parser.parser import ICal, EmailDateParser
1415
from circuit_maintenance_parser.errors import ProcessorError, ProviderError
1516
from circuit_maintenance_parser.processor import CombinedProcessor, SimpleProcessor, GenericProcessor
17+
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT
1618

1719
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
20+
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
1821
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1
1922
from circuit_maintenance_parser.parsers.colt import ICalParserColt1, CsvParserColt1
2023
from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1
@@ -49,6 +52,14 @@ class GenericProvider(BaseModel):
4952
that will be used. Default: `[SimpleProcessor(data_parsers=[ICal])]`.
5053
_default_organizer (optional): Defines a default `organizer`, an email address, to be used to create a
5154
`Maintenance` in absence of the information in the original notification.
55+
_include_filter (optional): Dictionary that defines matching regex per data type to take a notification into
56+
account.
57+
_exclude_filter (optional): Dictionary that defines matching regex per data type to NOT take a notification
58+
into account.
59+
60+
Notes:
61+
- If a notification matches both the `_include_filter` and `_exclude_filter`, the exclusion takes precedence and
62+
the notification will be filtered out.
5263
5364
Examples:
5465
>>> GenericProvider()
@@ -58,12 +69,55 @@ class GenericProvider(BaseModel):
5869
_processors: List[GenericProcessor] = [SimpleProcessor(data_parsers=[ICal])]
5970
_default_organizer: str = "unknown"
6071

72+
_include_filter: Dict[str, List[str]] = {}
73+
_exclude_filter: Dict[str, List[str]] = {}
74+
75+
def include_filter_check(self, data: NotificationData) -> bool:
76+
"""If `_include_filter` is defined, it verifies that the matching criteria is met."""
77+
if self._include_filter:
78+
return self.filter_check(self._include_filter, data, "include")
79+
return True
80+
81+
def exclude_filter_check(self, data: NotificationData) -> bool:
82+
"""If `_exclude_filter` is defined, it verifies that the matching criteria is met."""
83+
if self._exclude_filter:
84+
return self.filter_check(self._exclude_filter, data, "exclude")
85+
return False
86+
87+
@staticmethod
88+
def filter_check(filter_dict: Dict, data: NotificationData, filter_type: str) -> bool:
89+
"""Generic filter check."""
90+
data_part_content = None
91+
for data_part in data.data_parts:
92+
filter_data_type = data_part.type
93+
if filter_data_type not in filter_dict:
94+
continue
95+
96+
data_part_content = data_part.content.decode()
97+
if any(re.search(filter_re, data_part_content) for filter_re in filter_dict[filter_data_type]):
98+
logger.debug("Matching %s filter expression for %s.", filter_type, data_part_content)
99+
return True
100+
101+
if data_part_content:
102+
logger.warning("Not matching any %s filter expression for %s.", filter_type, data_part_content)
103+
else:
104+
logger.warning(
105+
"Not matching any %s filter expression because the notification doesn't contain the expected data_types: %s",
106+
filter_type,
107+
", ".join(filter_dict.keys()),
108+
)
109+
return False
110+
61111
def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
62112
"""Main entry method that will use the defined `_processors` in order to extract the `Maintenances` from data."""
63113
provider_name = self.__class__.__name__
64114
error_message = ""
65115
related_exceptions = []
66116

117+
if self.exclude_filter_check(data) or not self.include_filter_check(data):
118+
logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__)
119+
return []
120+
67121
for processor in self._processors:
68122
try:
69123
return processor.process(data, self.get_extended_data())
@@ -116,6 +170,15 @@ class AquaComms(GenericProvider):
116170
_default_organizer = "[email protected]"
117171

118172

173+
class AWS(GenericProvider):
174+
"""AWS provider custom class."""
175+
176+
_processors: List[GenericProcessor] = [
177+
CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]),
178+
]
179+
_default_organizer = "[email protected]"
180+
181+
119182
class Cogent(GenericProvider):
120183
"""Cogent provider custom class."""
121184

@@ -162,6 +225,8 @@ class HGC(GenericProvider):
162225
class Lumen(GenericProvider):
163226
"""Lumen provider custom class."""
164227

228+
_include_filter = {EMAIL_HEADER_SUBJECT: ["Scheduled Maintenance"]}
229+
165230
_processors: List[GenericProcessor] = [
166231
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserLumen1]),
167232
]

0 commit comments

Comments
 (0)