|
1 | | -import copy |
2 | | -from enrichers.utils import ( |
3 | | - clean_facility_name, |
4 | | - NOMINATIM_DELAY, |
5 | | -) |
6 | | -from schemas import enrich_resp_schema |
7 | | -import time |
8 | | -from utils import ( |
9 | | - logger, |
10 | | - session, |
11 | | -) |
| 1 | +from schemas import default_coords |
| 2 | +from enrichers import Enrichment |
| 3 | +from utils import logger |
12 | 4 |
|
13 | 5 |
|
14 | | -def search(facility_name: str, address: dict) -> dict: |
15 | | - search_name = clean_facility_name(facility_name) |
16 | | - search_url = "https://nominatim.openstreetmap.org/search" |
17 | | - resp_info = copy.deepcopy(enrich_resp_schema) |
18 | | - resp_info["enrichment_type"] = "openstreetmap" |
19 | | - data = [] |
20 | | - if not address: |
21 | | - logger.debug("No address for %s, simply searching for name", facility_name) |
22 | | - params = { |
23 | | - "q": search_name, |
24 | | - "format": "json", |
25 | | - "limit": 5, |
26 | | - "dedupe": 1, |
27 | | - } |
28 | | - logger.debug("Searching OSM for %s", search_name) |
29 | | - resp_info["search_query_steps"].append(search_name) # type: ignore [attr-defined] |
30 | | - try: |
31 | | - response = session.get(search_url, params=params, timeout=15) # type: ignore [arg-type] |
32 | | - response.raise_for_status() |
33 | | - data = response.json() |
34 | | - time.sleep(NOMINATIM_DELAY) |
35 | | - except Exception as e: |
36 | | - logger.debug(" OSM search error for '%s': %s", facility_name, e) |
37 | | - resp_info["search_query_steps"].append(f"(Failed -> {e})") # type: ignore [attr-defined] |
38 | | - return resp_info |
39 | | - else: |
40 | | - full_address = ( |
41 | | - f"{address['street']} {address['locality']}, {address['administrative_area']} {address['postal_code']}" |
42 | | - ) |
43 | | - locality = f"{address['locality']}, {address['administrative_area']} {address['postal_code']}" |
| 6 | +class OpenStreetMap(Enrichment): |
| 7 | + def search(self) -> dict: |
| 8 | + facility_name = self.search_args["facility_name"] |
| 9 | + address = self.search_args.get("address", {}) |
| 10 | + search_name = self._clean_facility_name(facility_name) |
44 | 11 | search_url = "https://nominatim.openstreetmap.org/search" |
45 | | - search_params = { |
46 | | - "facility_name": { |
47 | | - "q": f"{search_name} {full_address}", |
| 12 | + self.resp_info["enrichment_type"] = "openstreetmap" |
| 13 | + data = [] |
| 14 | + if not address: |
| 15 | + logger.debug("No address for %s, simply searching for name", facility_name) |
| 16 | + params = { |
| 17 | + "q": search_name, |
48 | 18 | "format": "json", |
49 | 19 | "limit": 5, |
50 | 20 | "dedupe": 1, |
51 | | - }, |
52 | | - "street_address": { |
53 | | - "q": f"{full_address}", |
54 | | - "format": "json", |
55 | | - "limit": 5, |
56 | | - "dedupe": 1, |
57 | | - }, |
58 | | - "locality": { |
59 | | - "q": f"{locality}", |
60 | | - "format": "json", |
61 | | - "limit": 5, |
62 | | - "dedupe": 1, |
63 | | - }, |
64 | | - } |
65 | | - for search_name, params in search_params.items(): |
66 | | - logger.debug("Searching OSM for %s", params["q"]) |
67 | | - resp_info["search_query_steps"].append(params["q"]) # type: ignore [attr-defined] |
| 21 | + } |
| 22 | + logger.debug("Searching OSM for %s", search_name) |
| 23 | + self.resp_info["search_query_steps"].append(search_name) # type: ignore [attr-defined] |
68 | 24 | try: |
69 | | - response = session.get(search_url, params=params, timeout=15) # type: ignore [arg-type] |
70 | | - response.raise_for_status() |
| 25 | + response = self._req(search_url, params=params, timeout=15) |
71 | 26 | data = response.json() |
72 | | - time.sleep(NOMINATIM_DELAY) |
73 | 27 | except Exception as e: |
74 | 28 | logger.debug(" OSM search error for '%s': %s", facility_name, e) |
75 | | - resp_info["search_query_steps"].append(f"(Failed -> {e})") # type: ignore [attr-defined] |
76 | | - continue |
77 | | - if data: |
78 | | - return resp_info |
79 | | - # when the URL result is a "way" this is usually correct. |
80 | | - # checks top five results. |
81 | | - match_terms = ["prison", "detention", "correctional", "jail"] |
82 | | - for result in data: |
83 | | - osm_type = result.get("type", "").lower() |
84 | | - display_name = result.get("display_name", "").lower() |
85 | | - if any(term in osm_type for term in match_terms) or any(term in display_name for term in match_terms): |
86 | | - # todo courthouse could be added, or other tags such as "prison:for=migrant" as a clear positive search result. |
87 | | - osm_id = result.get("osm_id", "") |
88 | | - osm_type_prefix = result.get("osm_type", "") |
89 | | - title = result.get("display_name", "") |
90 | | - if osm_id and osm_type_prefix: |
91 | | - resp_info["url"] = f"https://www.openstreetmap.org/{osm_type_prefix}/{osm_id}" |
92 | | - resp_info["title"] = title |
93 | | - return resp_info |
94 | | - # fallback to first result |
95 | | - first_result = data[0] |
96 | | - logger.debug("Address searches didn't directly find anything, just using the first result: %s", first_result) |
97 | | - # default to Washington, D.C.? |
98 | | - lat = first_result.get("lat", "38.89511000") |
99 | | - lon = first_result.get("lon", "-77.03637000") |
100 | | - title = first_result.get("display_name", "") |
101 | | - resp_info["search_query_steps"].append(f"{lat}&{lon}") # type: ignore [attr-defined] |
102 | | - if lat and lon: |
103 | | - resp_info["url"] = f"https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom=15" |
104 | | - resp_info["title"] = title |
105 | | - return resp_info |
| 29 | + self.resp_info["search_query_steps"].append(f"(Failed -> {e})") # type: ignore [attr-defined] |
| 30 | + return self.resp_info |
| 31 | + else: |
| 32 | + full_address = ( |
| 33 | + f"{address['street']} {address['locality']}, {address['administrative_area']} {address['postal_code']}" |
| 34 | + ) |
| 35 | + locality = f"{address['locality']}, {address['administrative_area']} {address['postal_code']}" |
| 36 | + search_url = "https://nominatim.openstreetmap.org/search" |
| 37 | + search_params = { |
| 38 | + "facility_name": { |
| 39 | + "q": f"{search_name} {full_address}", |
| 40 | + "format": "json", |
| 41 | + "limit": 5, |
| 42 | + "dedupe": 1, |
| 43 | + }, |
| 44 | + "street_address": { |
| 45 | + "q": f"{full_address}", |
| 46 | + "format": "json", |
| 47 | + "limit": 5, |
| 48 | + "dedupe": 1, |
| 49 | + }, |
| 50 | + "locality": { |
| 51 | + "q": f"{locality}", |
| 52 | + "format": "json", |
| 53 | + "limit": 5, |
| 54 | + "dedupe": 1, |
| 55 | + }, |
| 56 | + } |
| 57 | + for search_name, params in search_params.items(): |
| 58 | + logger.debug("Searching OSM for %s", params["q"]) |
| 59 | + self.resp_info["search_query_steps"].append(params["q"]) # type: ignore [attr-defined] |
| 60 | + try: |
| 61 | + response = self._req(search_url, params=params, timeout=15) |
| 62 | + data = response.json() |
| 63 | + except Exception as e: |
| 64 | + logger.debug(" OSM search error for '%s': %s", facility_name, e) |
| 65 | + self.resp_info["search_query_steps"].append(f"(Failed -> {e})") # type: ignore [attr-defined] |
| 66 | + continue |
| 67 | + if data: |
| 68 | + return self.resp_info |
| 69 | + # when the URL result is a "way" this is usually correct. |
| 70 | + # checks top five results. |
| 71 | + match_terms = ["prison", "detention", "correctional", "jail"] |
| 72 | + for result in data: |
| 73 | + osm_type = result.get("type", "").lower() |
| 74 | + lat = result.get("lat", default_coords["latitude"]) |
| 75 | + lon = result.get("lon", default_coords["longitude"]) |
| 76 | + display_name = result.get("display_name", "").lower() |
| 77 | + if any(term in osm_type for term in match_terms) or any(term in display_name for term in match_terms): |
| 78 | + # todo courthouse could be added, or other tags such as "prison:for=migrant" as a clear positive search result. |
| 79 | + osm_id = result.get("osm_id", "") |
| 80 | + osm_type_prefix = result.get("osm_type", "") |
| 81 | + title = result.get("display_name", "") |
| 82 | + if osm_id and osm_type_prefix: |
| 83 | + self.resp_info["url"] = f"https://www.openstreetmap.org/{osm_type_prefix}/{osm_id}" |
| 84 | + self.resp_info["details"]["latitude"] = lat # type: ignore [index] |
| 85 | + self.resp_info["details"]["longitude"] = lon # type: ignore [index] |
| 86 | + self.resp_info["title"] = title |
| 87 | + return self.resp_info |
| 88 | + # fallback to first result |
| 89 | + first_result = data[0] |
| 90 | + logger.debug("Address searches didn't directly find anything, just using the first result: %s", first_result) |
| 91 | + title = first_result.get("display_name", "") |
| 92 | + lat = first_result.get("lat", default_coords["latitude"]) |
| 93 | + lon = first_result.get("lon", default_coords["longitude"]) |
| 94 | + self.resp_info["search_query_steps"].append(f"{lat}&{lon}") # type: ignore [attr-defined] |
| 95 | + if lat and lon: |
| 96 | + self.resp_info["url"] = f"https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom=15" |
| 97 | + self.resp_info["details"]["latitude"] = lat # type: ignore [index] |
| 98 | + self.resp_info["details"]["longitude"] = lon # type: ignore [index] |
| 99 | + self.resp_info["title"] = title |
| 100 | + return self.resp_info |
0 commit comments