1313
1414
1515def remove_troublesome_chars (string : str ):
16+ if type (string ) is not str :
17+ return string
1618 troublesome_chars = {'"' : "" , "'" : "" , "\n " : "" }
1719 for k , v in troublesome_chars .items ():
1820 string = string .replace (k , v )
@@ -21,9 +23,9 @@ def remove_troublesome_chars(string: str):
2123
2224def retry_get (url , retries = 3 , timeout = 4 ):
2325 retry = Retry (total = retries )
24- session = requests .Session ()
25- session .mount ("https://" , HTTPAdapter (max_retries = retry ))
26- return session .get (url , timeout = timeout )
26+ with requests .Session () as session :
27+ session .mount ("https://" , HTTPAdapter (max_retries = retry ))
28+ return session .get (url , timeout = timeout )
2729
2830
2931def geojsonToFeatureCollection (geojson : dict ) -> dict :
@@ -67,18 +69,15 @@ def query_osm(changeset_ids: list, changeset_results):
6769 comment = created_by = None
6870 for tag in changeset .iter ("tag" ):
6971 if tag .attrib ["k" ] == "comment" :
70- try :
71- comment = remove_troublesome_chars (tag .attrib ["v" ])
72- except AttributeError :
73- pass
72+ comment = tag .attrib ["v" ]
7473 if tag .attrib ["k" ] == "created_by" :
7574 created_by = tag .attrib ["v" ]
7675
7776 changeset_results [int (id )] = {
78- "username" : username ,
77+ "username" : remove_troublesome_chars ( username ) ,
7978 "userid" : userid ,
80- "comment" : comment ,
81- "created_by" : created_by ,
79+ "comment" : remove_troublesome_chars ( comment ) ,
80+ "created_by" : remove_troublesome_chars ( created_by ) ,
8281 }
8382 return changeset_results
8483
@@ -115,22 +114,13 @@ def remove_noise_and_add_user_info(json: dict) -> dict:
115114 chunk_list = chunks (list (changeset_results .keys ()), 100 )
116115 for i , subset in enumerate (chunk_list ):
117116 changeset_results = query_osm (subset , changeset_results )
118- logger .info (
119- f"finished query { i } /{ len (chunk_list )} , { 100 * round (i / len (chunk_list ), 2 )} %"
120- )
117+ progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
118+ logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
121119
122120 for feature in json ["features" ]:
123121 changeset = changeset_results [feature ["properties" ]["changesetId" ]]
124- feature ["properties" ]["userid" ] = changeset ["userid" ]
125- for attribute_name in ["username" , "comment" , "created_by" ]:
126- # we need to replace " as this will cause problems
127- # when importing to postgres
128- try :
129- feature ["properties" ][attribute_name ] = changeset [
130- attribute_name
131- ].replace ('"' , "" )
132- except AttributeError :
133- pass
122+ for attribute_name in ["username" , "comment" , "created_by" , "userid" ]:
123+ feature ["properties" ][attribute_name ] = changeset [attribute_name ]
134124
135125 logger .info ("finished filtering and adding extra info" )
136126 if any (x > 0 for x in missing_rows .values ()):
0 commit comments