Skip to content

Commit d302083

Browse files
unpapillonc.radet
andauthored
Update Telstra.py for new notification format (#190)
* Update Telstra.py for new notification format * rename variables * Update telstra.py * add new telstra test files * Update test_e2e for telstra * Update test_parser for Telstra * fix linting for telstra parser * update regex condition * fix useless import * fix mypy lint * keep new arelion instead of Telia * fix indent * fix pylint * fix docstyle * add good result.json * fix json results * fix test_e2e * fix Telstra tests * fix compatibility with old format * fix parser organisation * fix maintenance details * fix black linting * add summary to tests * unit test fixing * improve telsrtra parser and complete tests * all test validated --------- Co-authored-by: c.radet <[email protected]>
1 parent 8251e57 commit d302083

File tree

6 files changed

+995
-3
lines changed

6 files changed

+995
-3
lines changed

circuit_maintenance_parser/parsers/telstra.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
"""Telstra parser."""
22
import logging
33
from typing import Dict, List
4-
4+
import re
55
from dateutil import parser
66
from bs4.element import ResultSet # type: ignore
77

88
from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status
99

10+
1011
# pylint: disable=too-many-branches
1112

1213

@@ -73,3 +74,91 @@ def parse_tables(self, tables: ResultSet, data: Dict): # pylint: disable=too-ma
7374
# First sentence containts 'Maintenance Details:' so we skip it
7475
data["summary"] = ". ".join(sentences[1:])
7576
break
77+
78+
79+
class HtmlParserTelstra2(Html):
80+
"""Notifications Parser for Telstra notifications."""
81+
82+
def parse_html(self, soup):
83+
"""Execute parsing."""
84+
data = {}
85+
self.parse_tables(soup.find_all("table"), data)
86+
return [data]
87+
88+
def add_maintenance_data(self, table: ResultSet, data: Dict):
89+
"""Populate data dict."""
90+
for strong_element in table.find_all("strong"):
91+
if not strong_element.string:
92+
continue
93+
strong_text = strong_element.string.strip()
94+
strong_sibling = strong_element.next_sibling.next_sibling
95+
if strong_text == "Reference number":
96+
data["maintenance_id"] = strong_sibling.string.strip()
97+
elif strong_text == "Start time":
98+
text_start = strong_sibling.string
99+
regex = re.search(r"\d{2}\s[a-zA-Z]{3}\s\d{4}\s\d{2}[:]\d{2}[:]\d{2}", text_start)
100+
if regex is not None:
101+
start = parser.parse(regex.group())
102+
data["start"] = self.dt2ts(start)
103+
else:
104+
data["start"] = "Not defined"
105+
elif strong_text == "End time":
106+
text_end = strong_sibling.string
107+
regex = re.search(r"\d{2}\s[a-zA-Z]{3}\s\d{4}\s\d{2}[:]\d{2}[:]\d{2}", text_end)
108+
if regex is not None:
109+
end = parser.parse(regex.group())
110+
data["end"] = self.dt2ts(end)
111+
else:
112+
data["end"] = "is not defined"
113+
elif strong_text == "Service/s under maintenance":
114+
data["circuits"] = []
115+
# TODO: This split is just an assumption of the multiple service, to be checked with more samples
116+
impacted_circuits = strong_sibling.text.split(", ")
117+
for circuit_id in impacted_circuits:
118+
data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=circuit_id.strip()))
119+
elif strong_text == "Maintenance details":
120+
sentences: List[str] = []
121+
for element in strong_element.next_elements:
122+
if element.string == "Reference number":
123+
break
124+
if element.string and element.string not in ["\n", "", "\xa0"] + sentences:
125+
sentences.append(element.string)
126+
if sentences:
127+
# First sentence containts 'Maintenance Details' so we skip it
128+
data["summary"] = ". ".join(sentences[1:])
129+
130+
def parse_tables(self, tables: ResultSet, data: Dict): # pylint: disable=too-many-locals
131+
"""Parse Table tag."""
132+
for table in tables:
133+
for p_element in table.find_all("p"):
134+
# TODO: We should find a more consistent way to parse the status of a maintenance note
135+
p_text = p_element.text.lower()
136+
if "attention" in p_text:
137+
regex = re.search("[^attention ].*", p_text.strip())
138+
if regex is not None:
139+
data["account"] = regex.group()
140+
else:
141+
data["account"] = "not Found"
142+
for span_element in table.find_all("span"):
143+
span_text = span_element.text.lower()
144+
if "planned maintenance to our network infrastructure" in span_text:
145+
data["status"] = Status("CONFIRMED")
146+
elif "emergency maintenance to our network infrastructure" in span_text:
147+
data["status"] = Status("CONFIRMED")
148+
elif "has been rescheduled" in span_text:
149+
data["status"] = Status("RE-SCHEDULED")
150+
elif "has been completed successfully" in span_text:
151+
data["status"] = Status("COMPLETED")
152+
elif (
153+
"did not proceed" in span_text
154+
or "has been withdrawn" in span_text
155+
or "has been cancelled" in span_text
156+
):
157+
data["status"] = Status("CANCELLED")
158+
elif "was unsuccessful" in span_text:
159+
data["status"] = Status("CANCELLED")
160+
else:
161+
continue
162+
break
163+
self.add_maintenance_data(table, data)
164+
break

circuit_maintenance_parser/provider.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
SubjectParserSeaborn2,
3636
)
3737
from circuit_maintenance_parser.parsers.sparkle import HtmlParserSparkle1
38-
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1
38+
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1, HtmlParserTelstra2
3939
from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1
4040
from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1
4141
from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1, SubjectParserZayo1
@@ -330,6 +330,7 @@ class Telstra(GenericProvider):
330330

331331
_processors: List[GenericProcessor] = [
332332
SimpleProcessor(data_parsers=[ICal]),
333+
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserTelstra2]),
333334
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserTelstra1]),
334335
]
335336
_default_organizer = "[email protected]"

0 commit comments

Comments
 (0)