Skip to content

Commit e6bcab8

Browse files
jarriagadjarria
andauthored
Cogent text parser (#192)
* cogent parsers mostly done * cogent text parser working * cogent text and subject parser, testing included * formatting changes * fixes errors noted from pipeline, all tests passing now Co-authored-by: javier.arriagada <[email protected]>
1 parent 708ca73 commit e6bcab8

File tree

4 files changed

+236
-2
lines changed

4 files changed

+236
-2
lines changed

circuit_maintenance_parser/parsers/cogent.py

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,145 @@
66
from pytz import timezone, UTC
77
from bs4.element import ResultSet # type: ignore
88

9-
from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status
9+
from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Html, Impact, Status, Text
1010

1111
logger = logging.getLogger(__name__)
1212

1313
# pylint: disable=too-many-branches
1414

1515

16+
class SubjectParserCogent1(EmailSubjectParser):
17+
"""Subject parser for Cogent nofifications."""
18+
19+
def parse_subject(self, subject: str):
20+
"""Parse subject.
21+
22+
Example:
23+
11/19/2022 Circuit Provider Maintenance - Edina, MN 1-300123456
24+
Correction 06/11/2021 AB987654321-1 Planned Network Maintenance - San Jose, CA 1-123456789
25+
"""
26+
data: Dict = {"circuits": []}
27+
28+
subject = subject.lower()
29+
30+
if subject.startswith("correction") or "rescheduled" in subject:
31+
data["status"] = Status("RE-SCHEDULED")
32+
elif "cancellation" in subject:
33+
data["status"] = Status("CANCELLED")
34+
elif "planned" in subject or "provider" in subject or "emergency" in subject:
35+
data["status"] = Status("CONFIRMED")
36+
elif "completed" in subject:
37+
data["status"] = Status("COMPLETED")
38+
else:
39+
data["status"] = Status("NO-CHANGE")
40+
41+
match = re.search(r".* ([\d-]+)", subject)
42+
if match:
43+
circuit_id = match.group(1)
44+
data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=circuit_id.strip()))
45+
46+
return [data]
47+
48+
49+
class TextParserCogent1(Text):
50+
"""Parse text body of Cogent emails."""
51+
52+
def parse_text(self, text):
53+
"""Execute parsing of text.
54+
55+
Example:
56+
CIRCUIT PROVIDER MAINTENANCE
57+
58+
Dear Cogent Customer,
59+
60+
As a valued customer, Cogent is committed to keeping you informed about any changes in the status of your service with us. This email is to alert you regarding a circuit provider maintenance which will affect your connection to Cogent:
61+
62+
Start time: 10:00pm CT 11/19/2022
63+
End time: 5:00am CT 11/20/2022
64+
Work order number: VN16123
65+
Order ID(s) impacted: 1-300123456
66+
Expected Outage/Downtime: 7 hours
67+
68+
Cogent customers receiving service in Edina, MN will be affected by this outage. This outage has been scheduled by Zayo. The purpose of this maintenance is to repair damaged fiber. Only the Cogent Order ID(s) above will be impacted.
69+
70+
During this maintenance window, you will experience an interruption in service while Zayo completes the maintenance activities; the interruption is expected to be less than 7 hours; however, due to the complexity of the work, your downtime may be longer.
71+
72+
Our network operations engineers closely monitor the work and will do everything possible to minimize any inconvenience to you. If you have any problems with your connection after this time, or if you have any questions regarding the maintenance at any point, please call Customer Support at 1-877-7-COGENT and refer to this Maintenance Ticket: VN16123.
73+
74+
"""
75+
data = {
76+
# "circuits": [],
77+
"summary": "Cogent circuit maintenance",
78+
}
79+
80+
lines = text.splitlines()
81+
82+
for line in lines:
83+
if line.startswith("Dear"):
84+
match = re.search(r"Dear (.*),", line)
85+
if match:
86+
data["account"] = match.group(1)
87+
elif line.startswith("Start time:"):
88+
match = re.search(r"Start time: ([A-Za-z\d: ]*) [()A-Za-z\s]+ (\d+/\d+/\d+)", line)
89+
if match:
90+
start_str = " ".join(match.groups())
91+
elif line.startswith("End time:"):
92+
match = re.search(r"End time: ([A-Za-z\d: ]*) [()A-Za-z\s]+ (\d+/\d+/\d+)", line)
93+
if match:
94+
end_str = " ".join(match.groups())
95+
elif line.startswith("Cogent customers receiving service"):
96+
data["summary"] = line
97+
match = re.search(r"[^Cogent].*?((\b[A-Z][a-z\s-]+)+, ([A-Za-z-]+[\s-]))", line)
98+
if match:
99+
local_timezone = timezone(self._geolocator.city_timezone(match.group(1).strip()))
100+
101+
# set start time using the local city timezone
102+
try:
103+
start = datetime.strptime(start_str, "%I:%M %p %d/%m/%Y")
104+
except ValueError:
105+
start = datetime.strptime(start_str, "%I:%M%p %d/%m/%Y")
106+
local_time = local_timezone.localize(start)
107+
# set start time to UTC
108+
utc_start = local_time.astimezone(UTC)
109+
data["start"] = self.dt2ts(utc_start)
110+
logger.info(
111+
"Mapped start time %s at %s (%s), to %s (UTC)",
112+
start_str,
113+
match.group(1).strip(),
114+
local_timezone,
115+
utc_start,
116+
)
117+
# set end time using the local city timezone
118+
try:
119+
end = datetime.strptime(end_str, "%I:%M %p %d/%m/%Y")
120+
except ValueError:
121+
end = datetime.strptime(end_str, "%I:%M%p %d/%m/%Y")
122+
local_time = local_timezone.localize(end)
123+
# set end time to UTC
124+
utc_end = local_time.astimezone(UTC)
125+
data["end"] = self.dt2ts(utc_end)
126+
logger.info(
127+
"Mapped end time %s at %s (%s), to %s (UTC)",
128+
end_str,
129+
match.group(1).strip(),
130+
local_timezone,
131+
utc_end,
132+
)
133+
elif line.startswith("Work order number:"):
134+
match = re.search("Work order number: (.*)", line)
135+
if match:
136+
data["maintenance_id"] = match.group(1)
137+
elif line.startswith("Order ID(s) impacted:"):
138+
data["circuits"] = []
139+
match = re.search(r"Order ID\(s\) impacted: (.*)", line)
140+
if match:
141+
for circuit_id in match.group(1).split(","):
142+
data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=circuit_id.strip()))
143+
elif line.startswith("During this maintenance"):
144+
data["summary"] = line
145+
return [data]
146+
147+
16148
class HtmlParserCogent1(Html):
17149
"""Notifications Parser for Cogent notifications."""
18150

circuit_maintenance_parser/provider.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
2121
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
2222
from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1
23-
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1
23+
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1, TextParserCogent1, SubjectParserCogent1
2424
from circuit_maintenance_parser.parsers.colt import CsvParserColt1, SubjectParserColt1, SubjectParserColt2
2525
from circuit_maintenance_parser.parsers.equinix import HtmlParserEquinix, SubjectParserEquinix
2626
from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1
@@ -205,6 +205,7 @@ class Cogent(GenericProvider):
205205

206206
_processors: List[GenericProcessor] = [
207207
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserCogent1]),
208+
CombinedProcessor(data_parsers=[EmailDateParser, TextParserCogent1, SubjectParserCogent1]),
208209
]
209210
_default_organizer = "[email protected]"
210211

tests/unit/data/cogent/cogent3.eml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
Received: from PH0PR14MB4247.namprd14.prod.outlook.com (2603:10b6:510:2a::19)
2+
by BN6PR1401MB2083.namprd14.prod.outlook.com with HTTPS; Thu, 3 Nov 2022
3+
11:48:38 +0000
4+
Received: from SN7PR04CA0214.namprd04.prod.outlook.com (2222:10b6:8306:127::9)
5+
by PH0PR14MB4247.namprd14.prod.outlook.com (2222:10b6:510:23a::19) with
6+
Microsoft SMTP Server (version=TLS1_2,
7+
cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.24.5391.22; Thu, 3 Nov
8+
2022 11:48:35 +0000
9+
Received: from SN1NAM02GT0032.eop-nam01.prod.protection.outlook.com
10+
(2444:10v6:806:127:cafe::4a) by SR7PRP4C$0214.outlook.office365.com
11+
(2444:10bj:806:127::9) with Microsoft SMTP Server (version=TLS1_2,
12+
cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5291.22 via Frontend
13+
Transport; Thu, 3 Nov 2022 11:48:35 +0000
14+
Authentication-Results: spf=pass (sender IP is 11.22.33.244)
15+
smtp.mailfrom=cogentco.com; dkim=none (message not signed)
16+
header.d=none;dmarc=bestguesspass action=none
17+
header.from=cogentco.com;compauth=pass reason=109
18+
Received-SPF: Pass (protection.outlook.com: domain of cogentco.com designates
19+
11.22.33.244 as permitted sender) receiver=protection.outlook.com;
20+
client-ip=11.22.33.244; helo=engtools.sys.cogentco.com; pr=C
21+
Received: from engtools.sys.cogentco.com (11.22.33.244) by
22+
SN1NAM02FT0042.mail.protection.outlook.com (10.20.30.140) with Microsoft SMTP
23+
Server id 13.20.5791.20 via Frontend Transport; Thu, 3 Nov 2022 11:48:35
24+
+0000
25+
Message-ID: <[email protected]>
26+
27+
28+
Date: Thu, 03 Nov 2022 11:44:00 +0000
29+
Subject: 11/3/2022 Fiber Provider Maintenance - Minneapolis, MN
30+
1-300123456
31+
Content-Type: text/plain; charset="iso-8859-1"
32+
Content-Transfer-Encoding: quoted-printable
33+
Return-Path: [email protected]
34+
X-MS-Exchange-Organization-ExpirationStartTime: 03 Nov 2022 11:48:35.4396
35+
(UTC)
36+
X-EOPAttributedMessage:
37+
(UTC)
38+
MIME-Version: 1.0
39+
40+
CAUTION: This email originated outside of Cyxtera. Do not click links or op=
41+
en attachments unless you recognize the sender and have verified the conten=
42+
t is safe.
43+
44+
FIBER PROVIDER MAINTENANCE
45+
46+
Dear Cogent Customer,
47+
48+
As a valued customer, Cogent is committed to keeping you informed about any=
49+
changes in the status of your service with us. This email is to alert you =
50+
regarding a fiber provider maintenance which will affect your connection to=
51+
Cogent:
52+
53+
Start time: 10:00pm CT 11/3/2022 (Day 3 of 3)
54+
End time: 5:00am CT 11/4/2022
55+
Work order number: VN12345
56+
Order ID(s) impacted: 1-300123456
57+
Expected Outage/Downtime: 7 hours
58+
59+
Cogent customers receiving service in Minneapolis, MN will be affected by t=
60+
his outage. This outage has been scheduled by Zayo. The purpose of this mai=
61+
ntenance is to replace damaged fiber. Only the Cogent Order ID(s) above wil=
62+
l be impacted.
63+
64+
During this maintenance window, you will experience an interruption in serv=
65+
ice while Zayo completes the maintenance activities; the interruption is ex=
66+
pected to be less than 7 hours; however, due to the complexity of the work,=
67+
your downtime may be longer.
68+
69+
Our network operations engineers closely monitor the work and will do every=
70+
thing possible to minimize any inconvenience to you. If you have any probl=
71+
ems with your connection after this time, or if you have any questions rega=
72+
rding the maintenance at any point, please call Customer Support at 1-877-7=
73+
-COGENT and refer to this Maintenance Ticket: VN15285.
74+
75+
We appreciate your patience during this work and welcome any feedback. Plea=
76+
se send all questions and concerns to mailto:[email protected]
77+
78+
79+
80+
Sincerely,
81+
82+
Customer Support
83+
Cogent Communications
84+
85+
877-7COGENT
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[
2+
{
3+
"account": "Cogent Customer",
4+
"circuits": [
5+
{
6+
"circuit_id": "1-300123456",
7+
"impact": "OUTAGE"
8+
}
9+
],
10+
"end": 1649671200,
11+
"maintenance_id": "VN12345",
12+
"start": 1667475840,
13+
"status": "CONFIRMED",
14+
"summary": "During this maintenance window, you will experience an interruption in service while Zayo completes the maintenance activities; the interruption is expected to be less than 7 hours; however, due to the complexity of the work, your downtime may be longer."
15+
}
16+
]

0 commit comments

Comments
 (0)