Skip to content

Commit d4f21cd

Browse files
committed
query from osm if not available in osmcha
1 parent 88ab2f1 commit d4f21cd

File tree

2 files changed

+63
-12
lines changed

2 files changed

+63
-12
lines changed

mapswipe_workers/mapswipe_workers/definitions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
LOGGING_FILE_PATH = os.path.join(DATA_PATH, "mapswipe_workers.log")
1414

1515
OHSOME_API_LINK = "https://api.ohsome.org/v1/"
16+
OSM_API_LINK = "https://www.openstreetmap.org/api/0.6/"
1617
OSMCHA_API_LINK = "https://osmcha.org/api/v1/"
1718
OSMCHA_API_KEY = os.environ["OSMCHA_API_KEY"]
1819

mapswipe_workers/mapswipe_workers/utils/api_calls.py

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
from xml.etree import ElementTree
2+
13
import requests
24
from requests.adapters import HTTPAdapter
35
from requests.packages.urllib3.util.retry import Retry
46

57
from mapswipe_workers.definitions import (
68
OHSOME_API_LINK,
9+
OSM_API_LINK,
710
OSMCHA_API_KEY,
811
OSMCHA_API_LINK,
912
CustomError,
@@ -21,13 +24,16 @@ def remove_troublesome_chars(string: str):
2124
return string
2225

2326

24-
def retry_get(url, retries=3, timeout=4):
27+
def retry_get(url, retries=3, timeout=4, to_osmcha: bool = False):
2528
"""Retry a query for a variable amount of tries."""
2629
retry = Retry(total=retries)
2730
with requests.Session() as session:
2831
session.mount("https://", HTTPAdapter(max_retries=retry))
29-
headers = {"Authorization": OSMCHA_API_KEY}
30-
return session.get(url, timeout=timeout, headers=headers)
32+
if to_osmcha:
33+
headers = {"Authorization": OSMCHA_API_KEY}
34+
return session.get(url, timeout=timeout, headers=headers)
35+
else:
36+
return session.get(url, timeout=timeout)
3137

3238

3339
def geojsonToFeatureCollection(geojson: dict) -> dict:
@@ -54,18 +60,14 @@ def query_osmcha(changeset_ids: list, changeset_results):
5460
id_string = ",".join(map(str, changeset_ids))
5561

5662
url = OSMCHA_API_LINK + f"changesets/?ids={id_string}"
57-
logger.info(url)
58-
logger.info(len(changeset_ids))
59-
response = retry_get(url)
63+
response = retry_get(url, to_osmcha=True)
6064
if response.status_code != 200:
6165
err = f"osmcha request failed: {response.status_code}"
6266
logger.warning(f"{err}")
6367
logger.warning(response.json())
6468
raise CustomError(err)
6569
response = response.json()
66-
logger.info(response)
6770
for feature in response["features"]:
68-
logger.info(feature)
6971
changeset_results[int(feature["id"])] = {
7072
"username": remove_troublesome_chars(feature["properties"]["user"]),
7173
"userid": feature["properties"]["uid"],
@@ -76,9 +78,45 @@ def query_osmcha(changeset_ids: list, changeset_results):
7678
return changeset_results
7779

7880

81+
def query_osm(changeset_ids: list, changeset_results):
82+
"""Get data from changesetId."""
83+
id_string = ",".join(map(str, changeset_ids))
84+
85+
url = OSM_API_LINK + f"changesets?changesets={id_string}"
86+
response = retry_get(url)
87+
if response.status_code != 200:
88+
err = f"osm request failed: {response.status_code}"
89+
logger.warning(f"{err}")
90+
logger.warning(response.json())
91+
raise CustomError(err)
92+
tree = ElementTree.fromstring(response.content)
93+
94+
for changeset in tree.iter("changeset"):
95+
id = changeset.attrib["id"]
96+
username = remove_troublesome_chars(changeset.attrib["user"])
97+
userid = changeset.attrib["uid"]
98+
comment = created_by = None
99+
for tag in changeset.iter("tag"):
100+
if tag.attrib["k"] == "comment":
101+
comment = tag.attrib["v"]
102+
if tag.attrib["k"] == "created_by":
103+
created_by = tag.attrib["v"]
104+
105+
changeset_results[int(id)] = {
106+
"username": remove_troublesome_chars(username),
107+
"userid": userid,
108+
"comment": remove_troublesome_chars(comment),
109+
"editor": remove_troublesome_chars(created_by),
110+
}
111+
return changeset_results
112+
113+
79114
def remove_noise_and_add_user_info(json: dict) -> dict:
80115
"""Delete unwanted information from properties."""
81116
logger.info("starting filtering and adding extra info")
117+
batch_size = 100
118+
119+
# remove noise
82120
changeset_results = {}
83121

84122
missing_rows = {
@@ -100,20 +138,32 @@ def remove_noise_and_add_user_info(json: dict) -> dict:
100138
changeset_results[new_properties["changesetId"]] = None
101139
feature["properties"] = new_properties
102140

141+
# add info
103142
len_osm = len(changeset_results.keys())
104-
batches = int(len(changeset_results.keys()) / 100) + 1
143+
batches = int(len(changeset_results.keys()) / batch_size) + 1
105144
logger.info(
106-
f"""{len_osm} changesets will be queried in roughly {batches} batches"""
145+
f"""{len_osm} changesets will be queried in roughly {batches} batches from osmCHA""" # noqa E501
107146
)
108-
chunk_list = chunks(list(changeset_results.keys()), 50)
147+
148+
chunk_list = chunks(list(changeset_results.keys()), batch_size)
109149
for i, subset in enumerate(chunk_list):
110150
changeset_results = query_osmcha(subset, changeset_results)
111151
progress = round(100 * ((i + 1) / len(chunk_list)), 1)
112152
logger.info(f"finished query {i+1}/{len(chunk_list)}, {progress}")
113153

154+
missing_ids = [i for i, v in changeset_results.items() if v is None]
155+
chunk_list = chunks(missing_ids, batch_size)
156+
batches = int(len(missing_ids) / batch_size) + 1
157+
logger.info(
158+
f"""{len(missing_ids)} changesets where missing from osmCHA and are now queried via osmAPI in {batches} batches""" # noqa E501
159+
)
160+
for i, subset in enumerate(chunk_list):
161+
changeset_results = query_osm(subset, changeset_results)
162+
progress = round(100 * ((i + 1) / len(chunk_list)), 1)
163+
logger.info(f"finished query {i+1}/{len(chunk_list)}, {progress}")
164+
114165
for feature in json["features"]:
115166
changeset = changeset_results[int(feature["properties"]["changesetId"])]
116-
logger.warn(changeset)
117167
for attribute_name in ["username", "comment", "editor", "userid"]:
118168
feature["properties"][attribute_name] = changeset[attribute_name]
119169

0 commit comments

Comments
 (0)