Skip to content

Commit 79c724a

Browse files
author
Christian Adell
authored
Use local locations DB (#104)
* Use local locations DB
1 parent 68b5386 commit 79c724a

File tree

11 files changed

+41178
-79
lines changed

11 files changed

+41178
-79
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44

55
### Fixed
66

7-
- #98 - Add handling for `Lumen` notification with Alt Circuit ID
7+
- #98 - Add handling for `Lumen` notification with Alt Circuit ID.
88
- #99 - Extend `Zayo` Html parser to handle different table headers.
9+
- #103 - Add `Equinix` provider.
10+
- #104 - Use a local locations DB to map city to timezone as first option, keeping API as fallback option.
11+
- #105 - Extend `Colt` parser to support multiple `Maintenance` statuses.
912

1013
## v2.0.3 - 2021-10-01
1114

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,7 @@ The project is following Network to Code software development guidelines and is
307307

308308
For any questions or comments, please check the [FAQ](FAQ.md) first and feel free to swing by the [Network to Code slack channel](https://networktocode.slack.com/) (channel #networktocode).
309309
Sign up [here](http://slack.networktocode.com/)
310+
311+
## License notes
312+
313+
This library uses a Basic World Cities Database by Pareto Software, LLC, the owner of Simplemaps.com: The Provider offers a Basic World Cities Database free of charge. This database is licensed under the Creative Commons Attribution 4.0 license as described at: https://creativecommons.org/licenses/by/4.0/.

circuit_maintenance_parser/data/worldcities.csv

Lines changed: 41002 additions & 0 deletions
Large diffs are not rendered by default.

circuit_maintenance_parser/parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from circuit_maintenance_parser.errors import ParserError
1717
from circuit_maintenance_parser.output import Status, Impact, CircuitImpact
1818
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT, EMAIL_HEADER_DATE
19+
from circuit_maintenance_parser.utils import Geolocator
1920

2021
# pylint: disable=no-member
2122

@@ -33,6 +34,8 @@ class Parser(BaseModel, extra=Extra.forbid):
3334
# _data_types are used to match the Parser to to each type of DataPart
3435
_data_types = ["text/plain", "plain"]
3536

37+
_geolocator = Geolocator()
38+
3639
@classmethod
3740
def get_data_types(cls) -> List[str]:
3841
"""Return the expected data type."""

circuit_maintenance_parser/parsers/cogent.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from bs4.element import ResultSet # type: ignore
88

99
from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status
10-
from circuit_maintenance_parser.utils import city_timezone
1110

1211
logger = logging.getLogger(__name__)
1312

@@ -48,7 +47,7 @@ def parse_div(self, divs: ResultSet, data: Dict): # pylint: disable=too-many-lo
4847
elif line.startswith("Cogent customers receiving service"):
4948
match = re.search(r"[^Cogent].*?((\b[A-Z][a-z\s-]+)+, ([A-Za-z-]+[\s-]))", line)
5049
if match:
51-
parsed_timezone = city_timezone(match.group(1).strip())
50+
parsed_timezone = self._geolocator.city_timezone(match.group(1).strip())
5251
local_timezone = timezone(parsed_timezone)
5352
# set start time using the local city timezone
5453
start = datetime.strptime(start_str, "%I:%M %p %d/%m/%Y")

circuit_maintenance_parser/utils.py

Lines changed: 100 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,113 @@
11
"""Utility functions for the library."""
2+
import os
3+
import logging
4+
from typing import Tuple, Dict, Union
5+
import csv
6+
27
from geopy.exc import GeocoderUnavailable, GeocoderTimedOut, GeocoderServiceError # type: ignore
38
from geopy.geocoders import Nominatim # type: ignore
49
from tzwhere import tzwhere # type: ignore
10+
import backoff # type: ignore
11+
512
from .errors import ParserError
613

14+
logger = logging.getLogger(__name__)
15+
16+
dirname = os.path.dirname(__file__)
17+
18+
19+
class Geolocator:
20+
"""Class to obtain Geo Location coordinates."""
21+
22+
# Keeping caching of local DB and timezone in the class
23+
db_location: Dict[Union[Tuple[str, str], str], Tuple[float, float]] = {}
24+
timezone = None
25+
26+
def __init__(self):
27+
"""Initialize instance."""
28+
self.load_db_location()
29+
self.load_timezone()
30+
31+
@classmethod
32+
def load_timezone(cls):
33+
"""Load the timezone resolver."""
34+
if cls.timezone is None:
35+
cls.timezone = tzwhere.tzwhere()
36+
logger.info("Loaded local timezone resolver.")
37+
38+
@classmethod
39+
def load_db_location(cls):
40+
"""Load the localtions DB from CSV into a Dict."""
41+
with open(os.path.join(dirname, "data", "worldcities.csv")) as csvfile:
42+
reader = csv.DictReader(csvfile)
43+
for row in reader:
44+
# Index by city and country
45+
cls.db_location[(row["city_ascii"], row["country"])] = (float(row["lat"]), float(row["lng"]))
46+
# Index by city (first entry wins if duplicated names)
47+
if row["city_ascii"] not in cls.db_location:
48+
cls.db_location[row["city_ascii"]] = (float(row["lat"]), float(row["lng"]))
749

8-
def city_timezone(city: str) -> str:
9-
"""Get the timezone for a given city.
50+
def get_location(self, city: str) -> Tuple[float, float]:
51+
"""Get location."""
52+
try:
53+
location_coordinates = self.get_location_from_local_file(city)
54+
except ValueError:
55+
location_coordinates = self.get_location_from_api(city)
1056

11-
Args:
12-
city (str): Geographic location name
13-
"""
14-
try:
57+
logger.debug(
58+
"Resolved city %s to coordinates: lat %s - lon %s", city, location_coordinates[0], location_coordinates[1],
59+
)
60+
return location_coordinates
61+
62+
def get_location_from_local_file(self, city: str) -> Tuple[float, float]:
63+
"""Get location from Local DB."""
64+
city_name = city.split(", ")[0]
65+
country = city.split(", ")[-1]
66+
67+
lat, lng = self.db_location.get((city_name, country), self.db_location.get(city_name, (None, None)))
68+
if lat and lng:
69+
logger.debug("Resolved %s to lat %s, lon %sfrom local locations DB.", city, lat, lng)
70+
return (lat, lng)
71+
72+
logger.debug("City %s was not resolvable in the local locations DB.", city)
73+
raise ValueError
74+
75+
@staticmethod
76+
@backoff.on_exception(
77+
backoff.expo, (GeocoderUnavailable, GeocoderTimedOut, GeocoderServiceError), max_time=10, logger=logger,
78+
)
79+
def get_location_from_api(city: str) -> Tuple[float, float]:
80+
"""Get location from API."""
1581
geolocator = Nominatim(user_agent="circuit_maintenance")
1682
location = geolocator.geocode(city) # API call to OpenStreetMap web service
17-
timezone = (
18-
tzwhere.tzwhere()
19-
) # TODO: Offline loading of timezone location data is quite slow. Look for better alternative
20-
return timezone.tzNameAt(location.latitude, location.longitude)
21-
except (GeocoderUnavailable, GeocoderTimedOut, GeocoderServiceError):
22-
raise ParserError( # pylint: disable=raise-missing-from
23-
"Cannot connect to the remote Geolocator API to determine timezone"
24-
)
83+
logger.debug("Resolved %s to %s from OpenStreetMap webservice.", city, location)
84+
return (location.latitude, location.longitude)
85+
86+
def city_timezone(self, city: str) -> str:
87+
"""Get the timezone for a given city.
88+
89+
Args:
90+
city (str): Geographic location name
91+
"""
92+
if self.timezone is not None:
93+
try:
94+
latitude, longitude = self.get_location(city)
95+
timezone = self.timezone.tzNameAt(latitude, longitude)
96+
if not timezone:
97+
# In some cases, given a latitued and longitued, the tzwhere library returns
98+
# an empty timezone, so we try with the coordinates from the API as an alternative
99+
latitude, longitude = self.get_location_from_api(city)
100+
timezone = self.timezone.tzNameAt(latitude, longitude)
101+
102+
if timezone:
103+
logger.debug("Matched city %s to timezone %s", city, timezone)
104+
return timezone
105+
except Exception as exc:
106+
logger.error("Cannot obtain the timezone for city %s: %s", city, exc)
107+
raise ParserError( # pylint: disable=raise-missing-from
108+
f"Cannot obtain the timezone for city {city}: {exc}"
109+
)
110+
raise ParserError("Timezone resolution not properly initalized.")
25111

26112

27113
def rgetattr(obj, attr):

poetry.lock

Lines changed: 13 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ bs4 = "^0.0.1"
3030
lxml = "^4.6.2"
3131
geopy = "^2.1.0"
3232
tzwhere = "^3.0.3"
33+
backoff = "^1.11.1"
3334

3435
[tool.poetry.dev-dependencies]
3536
pytest = "^6.2.2"

tests/unit/test_e2e.py

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
GenericProvider,
1616
AquaComms,
1717
AWS,
18-
# Cogent,
18+
Cogent,
1919
Colt,
2020
EUNetworks,
2121
HGC,
@@ -62,29 +62,28 @@
6262
[Path(dir_path, "data", "aws", "aws2_result.json"),],
6363
),
6464
# Cogent
65-
# TODO: Recover tests back when issue #101 is fixed
66-
# (
67-
# Cogent,
68-
# [
69-
# ("html", Path(dir_path, "data", "cogent", "cogent1.html")),
70-
# (EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
71-
# ],
72-
# [
73-
# Path(dir_path, "data", "cogent", "cogent1_result.json"),
74-
# Path(dir_path, "data", "date", "email_date_1_result.json"),
75-
# ],
76-
# ),
77-
# (
78-
# Cogent,
79-
# [
80-
# ("html", Path(dir_path, "data", "cogent", "cogent2.html")),
81-
# (EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
82-
# ],
83-
# [
84-
# Path(dir_path, "data", "cogent", "cogent2_result.json"),
85-
# Path(dir_path, "data", "date", "email_date_1_result.json"),
86-
# ],
87-
# ),
65+
(
66+
Cogent,
67+
[
68+
("html", Path(dir_path, "data", "cogent", "cogent1.html")),
69+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
70+
],
71+
[
72+
Path(dir_path, "data", "cogent", "cogent1_result.json"),
73+
Path(dir_path, "data", "date", "email_date_1_result.json"),
74+
],
75+
),
76+
(
77+
Cogent,
78+
[
79+
("html", Path(dir_path, "data", "cogent", "cogent2.html")),
80+
(EMAIL_HEADER_DATE, Path(dir_path, "data", "date", "email_date_1")),
81+
],
82+
[
83+
Path(dir_path, "data", "cogent", "cogent2_result.json"),
84+
Path(dir_path, "data", "date", "email_date_1_result.json"),
85+
],
86+
),
8887
# Colt
8988
(
9089
Colt,

tests/unit/test_parsers.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
1111
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
1212

13-
# from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1
13+
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1
1414
from circuit_maintenance_parser.parsers.colt import CsvParserColt1
1515
from circuit_maintenance_parser.parsers.equinix import HtmlParserEquinix, SubjectParserEquinix
1616
from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1
@@ -76,16 +76,16 @@
7676
Path(dir_path, "data", "aws", "aws2_subject_parser_result.json"),
7777
),
7878
# Cogent
79-
# TODO: Recover testing when issue #101 is fixed # (
80-
# HtmlParserCogent1,
81-
# Path(dir_path, "data", "cogent", "cogent1.html"),
82-
# Path(dir_path, "data", "cogent", "cogent1_result.json"),
83-
# ),
84-
# (
85-
# HtmlParserCogent1,
86-
# Path(dir_path, "data", "cogent", "cogent2.html"),
87-
# Path(dir_path, "data", "cogent", "cogent2_result.json"),
88-
# ),
79+
(
80+
HtmlParserCogent1,
81+
Path(dir_path, "data", "cogent", "cogent1.html"),
82+
Path(dir_path, "data", "cogent", "cogent1_result.json"),
83+
),
84+
(
85+
HtmlParserCogent1,
86+
Path(dir_path, "data", "cogent", "cogent2.html"),
87+
Path(dir_path, "data", "cogent", "cogent2_result.json"),
88+
),
8989
# Colt
9090
(
9191
CsvParserColt1,

0 commit comments

Comments
 (0)