1+ from xml .etree import ElementTree
2+
13import requests
24from requests .adapters import HTTPAdapter
35from requests .packages .urllib3 .util .retry import Retry
46
57from mapswipe_workers .definitions import (
68 OHSOME_API_LINK ,
9+ OSM_API_LINK ,
710 OSMCHA_API_KEY ,
811 OSMCHA_API_LINK ,
912 CustomError ,
@@ -21,13 +24,16 @@ def remove_troublesome_chars(string: str):
2124 return string
2225
2326
24- def retry_get (url , retries = 3 , timeout = 4 ):
27+ def retry_get (url , retries = 3 , timeout = 4 , to_osmcha : bool = False ):
2528 """Retry a query for a variable amount of tries."""
2629 retry = Retry (total = retries )
2730 with requests .Session () as session :
2831 session .mount ("https://" , HTTPAdapter (max_retries = retry ))
29- headers = {"Authorization" : OSMCHA_API_KEY }
30- return session .get (url , timeout = timeout , headers = headers )
32+ if to_osmcha :
33+ headers = {"Authorization" : OSMCHA_API_KEY }
34+ return session .get (url , timeout = timeout , headers = headers )
35+ else :
36+ return session .get (url , timeout = timeout )
3137
3238
3339def geojsonToFeatureCollection (geojson : dict ) -> dict :
@@ -54,18 +60,14 @@ def query_osmcha(changeset_ids: list, changeset_results):
5460 id_string = "," .join (map (str , changeset_ids ))
5561
5662 url = OSMCHA_API_LINK + f"changesets/?ids={ id_string } "
57- logger .info (url )
58- logger .info (len (changeset_ids ))
59- response = retry_get (url )
63+ response = retry_get (url , to_osmcha = True )
6064 if response .status_code != 200 :
6165 err = f"osmcha request failed: { response .status_code } "
6266 logger .warning (f"{ err } " )
6367 logger .warning (response .json ())
6468 raise CustomError (err )
6569 response = response .json ()
66- logger .info (response )
6770 for feature in response ["features" ]:
68- logger .info (feature )
6971 changeset_results [int (feature ["id" ])] = {
7072 "username" : remove_troublesome_chars (feature ["properties" ]["user" ]),
7173 "userid" : feature ["properties" ]["uid" ],
@@ -76,9 +78,45 @@ def query_osmcha(changeset_ids: list, changeset_results):
7678 return changeset_results
7779
7880
81+ def query_osm (changeset_ids : list , changeset_results ):
82+ """Get data from changesetId."""
83+ id_string = "," .join (map (str , changeset_ids ))
84+
85+ url = OSM_API_LINK + f"changesets?changesets={ id_string } "
86+ response = retry_get (url )
87+ if response .status_code != 200 :
88+ err = f"osm request failed: { response .status_code } "
89+ logger .warning (f"{ err } " )
90+ logger .warning (response .json ())
91+ raise CustomError (err )
92+ tree = ElementTree .fromstring (response .content )
93+
94+ for changeset in tree .iter ("changeset" ):
95+ id = changeset .attrib ["id" ]
96+ username = remove_troublesome_chars (changeset .attrib ["user" ])
97+ userid = changeset .attrib ["uid" ]
98+ comment = created_by = None
99+ for tag in changeset .iter ("tag" ):
100+ if tag .attrib ["k" ] == "comment" :
101+ comment = tag .attrib ["v" ]
102+ if tag .attrib ["k" ] == "created_by" :
103+ created_by = tag .attrib ["v" ]
104+
105+ changeset_results [int (id )] = {
106+ "username" : remove_troublesome_chars (username ),
107+ "userid" : userid ,
108+ "comment" : remove_troublesome_chars (comment ),
109+ "editor" : remove_troublesome_chars (created_by ),
110+ }
111+ return changeset_results
112+
113+
79114def remove_noise_and_add_user_info (json : dict ) -> dict :
80115 """Delete unwanted information from properties."""
81116 logger .info ("starting filtering and adding extra info" )
117+ batch_size = 100
118+
119+ # remove noise
82120 changeset_results = {}
83121
84122 missing_rows = {
@@ -100,20 +138,32 @@ def remove_noise_and_add_user_info(json: dict) -> dict:
100138 changeset_results [new_properties ["changesetId" ]] = None
101139 feature ["properties" ] = new_properties
102140
141+ # add info
103142 len_osm = len (changeset_results .keys ())
104- batches = int (len (changeset_results .keys ()) / 100 ) + 1
143+ batches = int (len (changeset_results .keys ()) / batch_size ) + 1
105144 logger .info (
106- f"""{ len_osm } changesets will be queried in roughly { batches } batches"""
145+ f"""{ len_osm } changesets will be queried in roughly { batches } batches from osmCHA """ # noqa E501
107146 )
108- chunk_list = chunks (list (changeset_results .keys ()), 50 )
147+
148+ chunk_list = chunks (list (changeset_results .keys ()), batch_size )
109149 for i , subset in enumerate (chunk_list ):
110150 changeset_results = query_osmcha (subset , changeset_results )
111151 progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
112152 logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
113153
154+ missing_ids = [i for i , v in changeset_results .items () if v is None ]
155+ chunk_list = chunks (missing_ids , batch_size )
156+ batches = int (len (missing_ids ) / batch_size ) + 1
157+ logger .info (
158+ f"""{ len (missing_ids )} changesets where missing from osmCHA and are now queried via osmAPI in { batches } batches""" # noqa E501
159+ )
160+ for i , subset in enumerate (chunk_list ):
161+ changeset_results = query_osm (subset , changeset_results )
162+ progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
163+ logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
164+
114165 for feature in json ["features" ]:
115166 changeset = changeset_results [int (feature ["properties" ]["changesetId" ])]
116- logger .warn (changeset )
117167 for attribute_name in ["username" , "comment" , "editor" , "userid" ]:
118168 feature ["properties" ][attribute_name ] = changeset [attribute_name ]
119169
0 commit comments