66import backoff
77import certifi
88import random
9+ import re
910
1011from tqdm import tqdm
1112from urllib .parse import urlencode
1213from contextlib import suppress
13- from opencage .geocoder import OpenCageGeocode , OpenCageGeocodeError
14+ from opencage .geocoder import OpenCageGeocode , OpenCageGeocodeError , _query_for_reverse_geocoding
1415
1516class OpenCageBatchGeocoder ():
1617 def __init__ (self , options ):
@@ -38,9 +39,11 @@ async def geocode(self, input, output):
3839
3940 queue = asyncio .Queue (maxsize = self .options .limit )
4041
41- await self .read_input (input , queue )
42+ read_warnings = await self .read_input (input , queue )
4243
4344 if self .options .dry_run :
45+ if not read_warnings :
46+ print ('All good.' )
4447 return
4548
4649 if self .options .headers :
@@ -78,19 +81,28 @@ async def test_request(self):
7881 return { 'error' : exc }
7982
8083 async def read_input (self , input , queue ):
84+ any_warnings = False
8185 for index , row in enumerate (input ):
8286 line_number = index + 1
8387
8488 if len (row ) == 0 :
85- raise Exception (f"Empty line in input file at line number { line_number } , aborting" )
89+ self .log (f"Line { line_number } - Empty line" )
90+ any_warnings = True
91+ row = ['' ]
8692
8793 item = await self .read_one_line (row , line_number )
94+ if item ['warnings' ] is True :
95+ any_warnings = True
8896 await queue .put (item )
8997
9098 if queue .full ():
9199 break
92100
101+ return any_warnings
102+
93103 async def read_one_line (self , row , row_id ):
104+ warnings = False
105+
94106 if self .options .command == 'reverse' :
95107 input_columns = [1 , 2 ]
96108 elif self .options .input_columns :
@@ -105,14 +117,26 @@ async def read_one_line(self, row, row_id):
105117 # input_columns option uses 1-based indexing
106118 address .append (row [column - 1 ])
107119 except IndexError :
108- self .log (f"Missing input column { column } in { row } " )
120+ self .log (f"Line { row_id } - Missing input column { column } in { row } " )
121+ warnings = True
109122 else :
110123 address = row
111124
112- if self .options .command == 'reverse' and len (address ) != 2 :
113- self .log (f"Expected two comma-separated values for reverse geocoding, got { address } " )
125+ if self .options .command == 'reverse' :
114126
115- return { 'row_id' : row_id , 'address' : ',' .join (address ), 'original_columns' : row }
127+ if len (address ) != 2 :
128+ self .log (f"Line { row_id } - Expected two comma-separated values for reverse geocoding, got { address } " )
129+ else :
130+ # _query_for_reverse_geocoding attempts to convert into numbers. We rather have it fail
131+ # now than during the actual geocoding
132+ try :
133+ _query_for_reverse_geocoding (address [0 ], address [1 ])
134+ except :
135+ self .log (f"Line { row_id } - Does not look like latitude and longitude: '{ address [0 ]} ' and '{ address [1 ]} '" )
136+ warnings = True
137+ address = []
138+
139+ return { 'row_id' : row_id , 'address' : ',' .join (address ), 'original_columns' : row , 'warnings' : warnings }
116140
117141 async def worker (self , output , queue , progress ):
118142 while True :
@@ -147,8 +171,9 @@ async def _geocode_one_address():
147171
148172 try :
149173 if self .options .command == 'reverse' :
150- lon , lat = address .split (',' )
151- geocoding_results = await geocoder .reverse_geocode_async (lon , lat , ** params )
174+ if ',' in address :
175+ lon , lat = address .split (',' )
176+ geocoding_results = await geocoder .reverse_geocode_async (lon , lat , ** params )
152177 else :
153178 geocoding_results = await geocoder .geocode_async (address , ** params )
154179 except OpenCageGeocodeError as exc :
@@ -205,6 +230,7 @@ async def write_one_geocoding_result(self, output, row_id, address, geocoding_re
205230 output .writerow (row )
206231 self .write_counter = self .write_counter + 1
207232
233+
208234 def log (self , message ):
209235 if not self .options .quiet :
210236 sys .stderr .write (f"{ message } \n " )
0 commit comments