|
| 1 | +"""Circuit maintenance parser for PCCW Email notifications.""" |
| 2 | +import re |
| 3 | +from typing import List, Dict, Tuple, Any, ClassVar |
| 4 | +from datetime import datetime |
| 5 | + |
| 6 | +from bs4.element import ResultSet # type: ignore |
| 7 | +from circuit_maintenance_parser.output import Status |
| 8 | +from circuit_maintenance_parser.parser import Html, EmailSubjectParser |
| 9 | + |
| 10 | + |
| 11 | +class HtmlParserPCCW(Html): |
| 12 | + """Custom Parser for HTML portion of PCCW circuit maintenance notifications.""" |
| 13 | + |
| 14 | + DATE_TIME_FORMAT: ClassVar[str] = "%d/%m/%Y %H:%M:%S" |
| 15 | + PROVIDER: ClassVar[str] = "PCCW Global" |
| 16 | + |
| 17 | + def parse_html(self, soup: ResultSet) -> List[Dict]: |
| 18 | + """Parse PCCW circuit maintenance email. |
| 19 | +
|
| 20 | + Args: |
| 21 | + soup: BeautifulSoup ResultSet containing the email HTML content |
| 22 | +
|
| 23 | + Returns: |
| 24 | + List containing a dictionary with parsed maintenance data |
| 25 | + """ |
| 26 | + data: Dict[str, Any] = { |
| 27 | + "circuits": [], |
| 28 | + "provider": self.PROVIDER, |
| 29 | + "account": self._extract_account(soup), |
| 30 | + } |
| 31 | + start_time, end_time = self._extract_maintenance_window(soup) |
| 32 | + data["start"] = self.dt2ts(start_time) |
| 33 | + data["end"] = self.dt2ts(end_time) |
| 34 | + |
| 35 | + return [data] |
| 36 | + |
| 37 | + def _extract_account(self, soup: ResultSet) -> str: |
| 38 | + """Extract customer account from soup.""" |
| 39 | + customer_field = soup.find(string=re.compile("Customer Name :", re.IGNORECASE)) |
| 40 | + return customer_field.split(":")[1].strip() |
| 41 | + |
| 42 | + def _extract_maintenance_window(self, soup: ResultSet) -> Tuple[datetime, datetime]: |
| 43 | + """Extract start and end times from maintenance window.""" |
| 44 | + datetime_field = soup.find(string=re.compile("Date Time :", re.IGNORECASE)) |
| 45 | + time_parts = ( |
| 46 | + datetime_field.lower().replace("date time :", "-").replace("to", "-").replace("gmt", "-").split("-") |
| 47 | + ) |
| 48 | + start_time = datetime.strptime(time_parts[1].strip(), self.DATE_TIME_FORMAT) |
| 49 | + end_time = datetime.strptime(time_parts[2].strip(), self.DATE_TIME_FORMAT) |
| 50 | + return start_time, end_time |
| 51 | + |
| 52 | + |
| 53 | +class SubjectParserPCCW(EmailSubjectParser): |
| 54 | + """Custom Parser for Email subject of PCCW circuit maintenance notifications. |
| 55 | +
|
| 56 | + This parser extracts maintenance ID, status and summary from the email subject line. |
| 57 | + """ |
| 58 | + |
| 59 | + # Only completion notification doesn't come with ICal. Other such as planned outage, urgent maintenance, |
| 60 | + # amendment and cacellation notifications come with ICal. Hence, maintenance status is set to COMPLETED. |
| 61 | + DEFAULT_STATUS: ClassVar[Status] = Status.COMPLETED |
| 62 | + |
| 63 | + def parse_subject(self, subject: str) -> List[Dict]: |
| 64 | + """Parse PCCW circuit maintenance email subject. |
| 65 | +
|
| 66 | + Args: |
| 67 | + subject: Email subject string to parse |
| 68 | +
|
| 69 | + Returns: |
| 70 | + List containing a dictionary with parsed subject data including: |
| 71 | + - maintenance_id: Extracted from end of subject |
| 72 | + - status: Default COMPLETED status |
| 73 | + - summary: Cleaned subject line |
| 74 | + """ |
| 75 | + data: Dict[str, Any] = { |
| 76 | + "maintenance_id": self._extract_maintenance_id(subject), |
| 77 | + "status": self.DEFAULT_STATUS, |
| 78 | + "summary": self._clean_summary(subject), |
| 79 | + } |
| 80 | + |
| 81 | + return [data] |
| 82 | + |
| 83 | + def _extract_maintenance_id(self, subject: str) -> str: |
| 84 | + """Extract maintenance ID from the end of subject line.""" |
| 85 | + return subject.split("-")[-1].strip() |
| 86 | + |
| 87 | + def _clean_summary(self, subject: str) -> str: |
| 88 | + """Clean and format the summary text.""" |
| 89 | + return subject.strip().replace("\n", "") |
0 commit comments