Skip to content

Commit ce4173c

Browse files
authored
Provider implements a include filter to define relevant notifications (#91)
* Provider implements a include and exlcude filter to define relevant notifications
1 parent 78672d0 commit ce4173c

File tree

9 files changed

+146
-23
lines changed

9 files changed

+146
-23
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## v2.0.3
4+
5+
### Added
6+
7+
- #91 - `Provider` now adds `_include_filter` and `_exclude_filter` attributes (using regex) to filter in and out notifications that are relevant to be parsed vs other that are not, avoiding false positives.
8+
39
## v2.0.2 - 2021-09-28
410

511
### Fixed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ Circuit Maintenance Notification #0
184184
circuit-maintenance-parser --data-file "/tmp/___ZAYO TTN-00000000 Planned MAINTENANCE NOTIFICATION___.eml" --data-type email --provider-type zayo
185185
Circuit Maintenance Notification #0
186186
{
187-
"account": "Linode",
187+
"account": "some account",
188188
"circuits": [
189189
{
190190
"circuit_id": "/OGYX/000000/ /ZYO /",
@@ -226,6 +226,7 @@ The project is following Network to Code software development guidelines and is
226226
1. Define the `Parsers`(inheriting from some of the generic `Parsers` or a new one) that will extract the data from the notification, that could contain itself multiple `DataParts`. The `data_type` of the `Parser` and the `DataPart` have to match. The custom `Parsers` will be placed in the `parsers` folder.
227227
2. Update the `unit/test_parsers.py` with the new parsers, providing some data to test and validate the extracted data.
228228
3. Define a new `Provider` inheriting from the `GenericProvider`, defining the `Processors` and the respective `Parsers` to be used. Maybe you can reuse some of the generic `Processors` or maybe you will need to create a custom one. If this is the case, place it in the `processors` folder.
229+
- The `Provider` also supports the definition of a `_include_filter` and a `_exclude_filter` to limit the notifications that are actually processed, avoiding false positive errors for notification that are not relevant.
229230
4. Update the `unit/test_e2e.py` with the new provider, providing some data to test and validate the final `Maintenances` created.
230231
5. **Expose the new `Provider` class** updating the map `SUPPORTED_PROVIDERS` in `circuit_maintenance_parser/__init__.py` to officially expose the `Provider`.
231232

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Constants used in the library."""
2+
3+
EMAIL_HEADER_SUBJECT = "email-header-subject"
4+
EMAIL_HEADER_DATE = "email-header-date"

circuit_maintenance_parser/data.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import email
66
from pydantic import BaseModel, Extra
7+
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT, EMAIL_HEADER_DATE
8+
79

810
logger = logging.getLogger(__name__)
911

@@ -73,9 +75,8 @@ def init_from_emailmessage(cls: Type["NotificationData"], email_message) -> Opti
7375
cls.walk_email(email_message, data_parts)
7476

7577
# Adding extra headers that are interesting to be parsed
76-
data_parts.add(DataPart("email-header-subject", email_message["Subject"].encode()))
77-
# TODO: Date could be used to extend the "Stamp" time of a notification when not available, but we need a parser
78-
data_parts.add(DataPart("email-header-date", email_message["Date"].encode()))
78+
data_parts.add(DataPart(EMAIL_HEADER_SUBJECT, email_message["Subject"].encode()))
79+
data_parts.add(DataPart(EMAIL_HEADER_DATE, email_message["Date"].encode()))
7980
return cls(data_parts=list(data_parts))
8081
except Exception: # pylint: disable=broad-except
8182
logger.exception("Error found initializing data from email message: %s", email_message)

circuit_maintenance_parser/parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from circuit_maintenance_parser.errors import ParserError
1717
from circuit_maintenance_parser.output import Status, Impact, CircuitImpact
18+
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT, EMAIL_HEADER_DATE
1819

1920
# pylint: disable=no-member
2021

@@ -177,7 +178,7 @@ def clean_line(line):
177178
class EmailDateParser(Parser):
178179
"""Parser for Email Date."""
179180

180-
_data_types = ["email-header-date"]
181+
_data_types = [EMAIL_HEADER_DATE]
181182

182183
def parser_hook(self, raw: bytes):
183184
"""Execute parsing."""
@@ -190,7 +191,7 @@ def parser_hook(self, raw: bytes):
190191
class EmailSubjectParser(Parser):
191192
"""Parse data from subject or email."""
192193

193-
_data_types = ["email-header-subject"]
194+
_data_types = [EMAIL_HEADER_SUBJECT]
194195

195196
def parser_hook(self, raw: bytes):
196197
"""Execute parsing."""

circuit_maintenance_parser/provider.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""Definition of Provider class as the entry point to the library."""
22
import logging
3+
import re
34
import traceback
45

5-
from typing import Iterable, List
6+
from typing import Iterable, List, Dict
67

78
from pydantic import BaseModel
89

@@ -13,6 +14,7 @@
1314
from circuit_maintenance_parser.parser import ICal, EmailDateParser
1415
from circuit_maintenance_parser.errors import ProcessorError, ProviderError
1516
from circuit_maintenance_parser.processor import CombinedProcessor, SimpleProcessor, GenericProcessor
17+
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT
1618

1719
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
1820
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
@@ -50,6 +52,14 @@ class GenericProvider(BaseModel):
5052
that will be used. Default: `[SimpleProcessor(data_parsers=[ICal])]`.
5153
_default_organizer (optional): Defines a default `organizer`, an email address, to be used to create a
5254
`Maintenance` in absence of the information in the original notification.
55+
_include_filter (optional): Dictionary that defines matching regex per data type to take a notification into
56+
account.
57+
_exclude_filter (optional): Dictionary that defines matching regex per data type to NOT take a notification
58+
into account.
59+
60+
Notes:
61+
- If a notification matches both the `_include_filter` and `_exclude_filter`, the exclusion takes precedence and
62+
the notification will be filtered out.
5363
5464
Examples:
5565
>>> GenericProvider()
@@ -59,12 +69,55 @@ class GenericProvider(BaseModel):
5969
_processors: List[GenericProcessor] = [SimpleProcessor(data_parsers=[ICal])]
6070
_default_organizer: str = "unknown"
6171

72+
_include_filter: Dict[str, List[str]] = {}
73+
_exclude_filter: Dict[str, List[str]] = {}
74+
75+
def include_filter_check(self, data: NotificationData) -> bool:
76+
"""If `_include_filter` is defined, it verifies that the matching criteria is met."""
77+
if self._include_filter:
78+
return self.filter_check(self._include_filter, data, "include")
79+
return True
80+
81+
def exclude_filter_check(self, data: NotificationData) -> bool:
82+
"""If `_exclude_filter` is defined, it verifies that the matching criteria is met."""
83+
if self._exclude_filter:
84+
return self.filter_check(self._exclude_filter, data, "exclude")
85+
return False
86+
87+
@staticmethod
88+
def filter_check(filter_dict: Dict, data: NotificationData, filter_type: str) -> bool:
89+
"""Generic filter check."""
90+
data_part_content = None
91+
for data_part in data.data_parts:
92+
filter_data_type = data_part.type
93+
if filter_data_type not in filter_dict:
94+
continue
95+
96+
data_part_content = data_part.content.decode()
97+
if any(re.search(filter_re, data_part_content) for filter_re in filter_dict[filter_data_type]):
98+
logger.debug("Matching %s filter expression for %s.", filter_type, data_part_content)
99+
return True
100+
101+
if data_part_content:
102+
logger.warning("Not matching any %s filter expression for %s.", filter_type, data_part_content)
103+
else:
104+
logger.warning(
105+
"Not matching any %s filter expression because the notification doesn't contain the expected data_types: %s",
106+
filter_type,
107+
", ".join(filter_dict.keys()),
108+
)
109+
return False
110+
62111
def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
63112
"""Main entry method that will use the defined `_processors` in order to extract the `Maintenances` from data."""
64113
provider_name = self.__class__.__name__
65114
error_message = ""
66115
related_exceptions = []
67116

117+
if self.exclude_filter_check(data) or not self.include_filter_check(data):
118+
logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__)
119+
return []
120+
68121
for processor in self._processors:
69122
try:
70123
return processor.process(data, self.get_extended_data())
@@ -172,6 +225,8 @@ class HGC(GenericProvider):
172225
class Lumen(GenericProvider):
173226
"""Lumen provider custom class."""
174227

228+
_include_filter = {EMAIL_HEADER_SUBJECT: ["Scheduled Maintenance"]}
229+
175230
_processors: List[GenericProcessor] = [
176231
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserLumen1]),
177232
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Scheduled Maintenance Window

tests/unit/test_e2e.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from circuit_maintenance_parser.data import NotificationData
99
from circuit_maintenance_parser.errors import ProviderError
10-
10+
from circuit_maintenance_parser.constants import EMAIL_HEADER_DATE, EMAIL_HEADER_SUBJECT
1111

1212
# pylint: disable=duplicate-code
1313
from circuit_maintenance_parser.provider import (
@@ -65,7 +65,7 @@
6565
Cogent,
6666
[
6767
("html", Path(dir_path, "data", "cogent", "cogent1.html")),
68-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
68+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
6969
],
7070
[
7171
Path(dir_path, "data", "cogent", "cogent1_result.json"),
@@ -76,7 +76,7 @@
7676
Cogent,
7777
[
7878
("html", Path(dir_path, "data", "cogent", "cogent2.html")),
79-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
79+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
8080
],
8181
[
8282
Path(dir_path, "data", "cogent", "cogent2_result.json"),
@@ -105,7 +105,8 @@
105105
Lumen,
106106
[
107107
("html", Path(dir_path, "data", "lumen", "lumen1.html")),
108-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
108+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
109+
(EMAIL_HEADER_SUBJECT, Path(dir_path, "data", "lumen", "subject_work_planned")),
109110
],
110111
[
111112
Path(dir_path, "data", "lumen", "lumen1_result.json"),
@@ -116,7 +117,8 @@
116117
Lumen,
117118
[
118119
("html", Path(dir_path, "data", "lumen", "lumen2.html")),
119-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
120+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
121+
(EMAIL_HEADER_SUBJECT, Path(dir_path, "data", "lumen", "subject_work_planned")),
120122
],
121123
[
122124
Path(dir_path, "data", "lumen", "lumen2_result.json"),
@@ -127,7 +129,8 @@
127129
Lumen,
128130
[
129131
("html", Path(dir_path, "data", "lumen", "lumen3.html")),
130-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
132+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
133+
(EMAIL_HEADER_SUBJECT, Path(dir_path, "data", "lumen", "subject_work_planned")),
131134
],
132135
[
133136
Path(dir_path, "data", "lumen", "lumen3_result.json"),
@@ -138,7 +141,8 @@
138141
Lumen,
139142
[
140143
("html", Path(dir_path, "data", "lumen", "lumen4.html")),
141-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
144+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
145+
(EMAIL_HEADER_SUBJECT, Path(dir_path, "data", "lumen", "subject_work_planned")),
142146
],
143147
[
144148
Path(dir_path, "data", "lumen", "lumen4_result.json"),
@@ -150,7 +154,7 @@
150154
Megaport,
151155
[
152156
("html", Path(dir_path, "data", "megaport", "megaport1.html")),
153-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
157+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
154158
],
155159
[
156160
Path(dir_path, "data", "megaport", "megaport1_result.json"),
@@ -161,7 +165,7 @@
161165
Megaport,
162166
[
163167
("html", Path(dir_path, "data", "megaport", "megaport2.html")),
164-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
168+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
165169
],
166170
[
167171
Path(dir_path, "data", "megaport", "megaport2_result.json"),
@@ -221,7 +225,7 @@
221225
Telstra,
222226
[
223227
("html", Path(dir_path, "data", "telstra", "telstra1.html")),
224-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
228+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
225229
],
226230
[
227231
Path(dir_path, "data", "telstra", "telstra1_result.json"),
@@ -232,7 +236,7 @@
232236
Telstra,
233237
[
234238
("html", Path(dir_path, "data", "telstra", "telstra2.html")),
235-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
239+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
236240
],
237241
[
238242
Path(dir_path, "data", "telstra", "telstra2_result.json"),
@@ -245,7 +249,7 @@
245249
Turkcell,
246250
[
247251
("html", Path(dir_path, "data", "turkcell", "turkcell1.html")),
248-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
252+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
249253
],
250254
[
251255
Path(dir_path, "data", "turkcell", "turkcell1_result.json"),
@@ -256,7 +260,7 @@
256260
Turkcell,
257261
[
258262
("html", Path(dir_path, "data", "turkcell", "turkcell2.html")),
259-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
263+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
260264
],
261265
[
262266
Path(dir_path, "data", "turkcell", "turkcell2_result.json"),
@@ -268,7 +272,7 @@
268272
Verizon,
269273
[
270274
("html", Path(dir_path, "data", "verizon", "verizon1.html")),
271-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
275+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
272276
],
273277
[
274278
Path(dir_path, "data", "verizon", "verizon1_result.json"),
@@ -279,7 +283,7 @@
279283
Verizon,
280284
[
281285
("html", Path(dir_path, "data", "verizon", "verizon2.html")),
282-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
286+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
283287
],
284288
[
285289
Path(dir_path, "data", "verizon", "verizon2_result.json"),
@@ -290,7 +294,7 @@
290294
Verizon,
291295
[
292296
("html", Path(dir_path, "data", "verizon", "verizon3.html")),
293-
("email-header-date", Path(dir_path, "data", "date", "email_date_1")),
297+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
294298
],
295299
[
296300
Path(dir_path, "data", "verizon", "verizon3_result.json"),

tests/unit/test_providers.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,53 @@ def test_provide_get_maintenances_one_exception(provider_class):
5555
else:
5656
provider.get_maintenances(fake_data)
5757
assert mock_processor.call_count == 2
58+
59+
60+
def test_provider_with_include_filter():
61+
"""Tests usage of _include_filter."""
62+
63+
class ProviderWithIncludeFilter(GenericProvider):
64+
"""Fake Provider."""
65+
66+
_include_filter = {fake_data.data_parts[0].type: [fake_data.data_parts[0].content.decode()]}
67+
68+
# Because the include filter is matching with the data, we expect that we hit the `process`
69+
with pytest.raises(ProviderError):
70+
ProviderWithIncludeFilter().get_maintenances(fake_data)
71+
72+
# With a non matching data to include, the notification will be skipped and just return empty
73+
other_fake_data = NotificationData.init_from_raw("other type", b"other data")
74+
assert ProviderWithIncludeFilter().get_maintenances(other_fake_data) == []
75+
76+
77+
def test_provider_with_exclude_filter():
78+
"""Tests usage of _exclude_filter."""
79+
80+
class ProviderWithIncludeFilter(GenericProvider):
81+
"""Fake Provider."""
82+
83+
_exclude_filter = {fake_data.data_parts[0].type: [fake_data.data_parts[0].content.decode()]}
84+
85+
# Because the exclude filter is matching with the data, we expect that we skip the processing
86+
assert ProviderWithIncludeFilter().get_maintenances(fake_data) == []
87+
88+
# With a non matching data to exclude, the notification will be not skipped and processed
89+
other_fake_data = NotificationData.init_from_raw("other type", b"other data")
90+
with pytest.raises(ProviderError):
91+
ProviderWithIncludeFilter().get_maintenances(other_fake_data)
92+
93+
94+
def test_provider_with_include_and_exclude_filters():
95+
"""Tests matching of include and exclude filter, where the exclude takes precedence."""
96+
data = NotificationData.init_from_raw("fake_type", b"fake data")
97+
data.add_data_part("other_type", b"other data")
98+
99+
class ProviderWithIncludeFilter(GenericProvider):
100+
"""Fake Provider."""
101+
102+
_include_filter = {data.data_parts[0].type: [data.data_parts[0].content.decode()]}
103+
_exclude_filter = {data.data_parts[1].type: [data.data_parts[1].content.decode()]}
104+
105+
# Because the exclude filter and the include filter are matching, we expect the exclude to take
106+
# precedence
107+
assert ProviderWithIncludeFilter().get_maintenances(data) == []

0 commit comments

Comments
 (0)