1515import pygeoprocessing
1616import pygeoprocessing .geoprocessing
1717import requests
18+ import shapely .prepared
19+ import shapely .wkb
1820import yaml
1921from flask import jsonify
2022from flask import request
2123from flask_cors import CORS
2224from google .cloud import storage
2325from osgeo import gdal
2426from osgeo import osr
27+ from osgeo import ogr
2528
2629app = flask .Flask (__name__ , template_folder = 'templates' )
2730
5861app .logger .info ("WORKSPACE_DIR: %s" , WORKSPACE_DIR )
5962pygeoprocessing .geoprocessing ._LOGGING_PERIOD = 1.0
6063
64+ RASTER = 'raster'
65+ VECTOR = 'vector'
66+
6167
6268def _epsg_to_wkt (epsg_code ):
6369 srs = osr .SpatialReference ()
6470 srs .ImportFromEPSG (int (epsg_code ))
6571 return srs .ExportToWkt ()
6672
6773
74+ def _clip_vector_to_bounding_box (
75+ source_vector_path , target_bounding_box , target_vector_path ,
76+ target_projection_wkt = None ):
77+ """Clip a vector to the intersection of a target bounding box.
78+
79+ Optionally also reproject the vector.
80+
81+ Args:
82+ source_vector_path (str): path to a FlatGeobuf to be clipped.
83+ target_bounding_box (list): list of the form [xmin, ymin, xmax, ymax]
84+ target_vector_path (str): path to a FlatGeobuf to store the clipped
85+ vector.
86+ target_projection_wkt=None (str): target projection in wkt. Can be
87+ none to indicate no reprojection is required.
88+
89+ Returns:
90+ None
91+ """
92+ shapely_mask = shapely .prepared .prep (
93+ shapely .geometry .box (* target_bounding_box ))
94+
95+ app .logger .debug ("Opening base vector..." )
96+ base_vector = gdal .OpenEx (source_vector_path , gdal .OF_VECTOR )
97+ base_layer = base_vector .GetLayer ()
98+ base_layer .SetSpatialFilterRect (* target_bounding_box )
99+ base_layer_defn = base_layer .GetLayerDefn ()
100+ base_geom_type = base_layer .GetGeomType ()
101+ base_srs = base_layer .GetSpatialRef ()
102+
103+ if target_projection_wkt is not None :
104+ target_srs = osr .SpatialReference ()
105+ target_srs .ImportFromWkt (target_projection_wkt )
106+ coord_trans = osr .CreateCoordinateTransformation (base_srs , target_srs )
107+ else :
108+ target_srs = base_srs
109+
110+ app .logger .debug ("Setting up target..." )
111+ target_driver = gdal .GetDriverByName ('FlatGeobuf' )
112+ target_vector = target_driver .Create (
113+ target_vector_path , 0 , 0 , 0 , gdal .GDT_Unknown )
114+ target_layer = target_vector .CreateLayer (
115+ base_layer_defn .GetName (), target_srs , base_geom_type )
116+ target_layer .CreateFields (base_layer .schema )
117+
118+ app .logger .debug ("Clipping vector..." )
119+ target_layer .StartTransaction ()
120+ invalid_feature_count = 0
121+ n_processed = 0
122+ last_log_msg_time = time .time ()
123+ for feature in base_layer :
124+ now = time .time ()
125+ if now >= last_log_msg_time + 2.0 :
126+ app .logger .debug (f"Processed { n_processed } features so far" )
127+ last_log_msg_time = now
128+ n_processed += 1
129+
130+ invalid = False
131+ geometry = feature .GetGeometryRef ()
132+ try :
133+ shapely_geom = shapely .wkb .loads (bytes (geometry .ExportToWkb ()))
134+ # Catch invalid geometries that cannot be loaded by Shapely;
135+ # e.g. polygons with too few points for their type
136+ except shapely .errors .ShapelyError :
137+ invalid = True
138+ else :
139+ if shapely_geom .is_valid :
140+ # Check for intersection rather than use gdal.Layer.Clip()
141+ # to preserve the shape of the polygons
142+ if shapely_mask .intersects (shapely_geom ):
143+ # This appears to use the network WAY less
144+ new_feature = ogr .Feature .Clone (feature )
145+
146+ # If we need to, transform the geometry
147+ if target_projection_wkt is not None :
148+ geometry .Transform (coord_trans )
149+ new_feature .SetGeometry (geometry )
150+
151+ target_layer .CreateFeature (new_feature )
152+ else :
153+ invalid = True
154+ finally :
155+ if invalid :
156+ invalid_feature_count += 1
157+ app .logger .warning (
158+ f"The geometry at feature { feature .GetFID ()} is invalid "
159+ "and will be skipped." )
160+
161+ target_layer .CommitTransaction ()
162+
163+ if invalid_feature_count :
164+ app .logger .warning (
165+ f"{ invalid_feature_count } features in { source_vector_path } "
166+ "were found to be invalid during clipping and were skipped." )
167+
168+ base_layer = None
169+ base_vector = None
170+ target_layer = None
171+ target_vector = None
172+
173+
68174@functools .lru_cache
69- def cached_raster_info (vsi_raster_path ):
70- try :
71- return pygeoprocessing .get_raster_info (vsi_raster_path )
72- except Exception :
73- app .logger .error ("Failed to read raster info for %s" , vsi_raster_path )
74- raise
175+ def cached_file_info (vsi_file_path , file_type ):
176+ app .logger .info (f"Getting file info for { file_type } at { vsi_file_path } " )
177+ if file_type == RASTER :
178+ try :
179+ return pygeoprocessing .get_raster_info (vsi_file_path )
180+ except Exception :
181+ app .logger .error ("Failed to read raster info for %s" , vsi_file_path )
182+ raise
183+ elif file_type == VECTOR :
184+ try :
185+ return pygeoprocessing .get_vector_info (vsi_file_path )
186+ except Exception :
187+ app .logger .error ("Failed to read vector info for %s" , vsi_file_path )
188+ raise
75189
76190
77191def _align_bbox (bbox , raster_info ):
@@ -123,19 +237,19 @@ def epsg_info():
123237
124238@app .route ('/metadata' , methods = ['GET' ])
125239def metadata ():
126- cog_url = request .args .get ('cog_url ' )
240+ file_url = request .args .get ('file_url ' )
127241
128242 yaml_data = yaml .load (
129- requests .get (f'{ cog_url } .yml' , stream = True ).content ,
243+ requests .get (f'{ file_url } .yml' , stream = True ).content ,
130244 Loader = yaml .Loader )
131245 return yaml_data
132246
133247
134248@app .route ('/info' , methods = ['GET' ])
135249@functools .lru_cache
136250def info ():
137- cog_url = request .args .get ("cog_url " )
138- result = gdal .Info (f'/vsicurl/{ cog_url } ' , options = ['-json' ])
251+ file_url = request .args .get ("file_url " )
252+ result = gdal .Info (f'/vsicurl/{ file_url } ' , options = ['-json' ])
139253
140254 return jsonify ({
141255 'status' : 'success' ,
@@ -155,93 +269,108 @@ def clip_app():
155269
156270@app .route ("/clip" , methods = ['POST' ])
157271def clip ():
158- #data = flask.request.json
159- #source_raster = data['cog_url']
160- #target_bbox = data['target_bbox']
161- #target_epsg = data['target_epsg']
162-
163- #SOURCE_LOGGER.info(f"Clipping {source_raster} to {target_bbox}")
164-
165- #for i in range(11):
166- # time.sleep(1)
167- # print(f"logging progress {i*10}%")
168- # SOURCE_LOGGER.info(f"Progress: {i*10}%")
169- #SOURCE_LOGGER.info("Done!")
170-
171- #return jsonify({
172- # 'status': 'success',
173- # 'message': 'Clipping finished'
174- #})
175-
176272 parameters = request .get_json ()
177273 app .logger .info (parameters )
178274
179- if not parameters ['cog_url' ].startswith (TRUSTED_BUCKET ):
180- app .logger .error ("Invalid source raster, not in the known bucket: %s" ,
181- parameters ['cog_url' ])
182- raise ValueError ("Invalid COG provided." )
275+ if not parameters ['file_url' ].startswith (TRUSTED_BUCKET ):
276+ app .logger .error ("Invalid source file, not in the known bucket: %s" ,
277+ parameters ['file_url' ])
278+ raise ValueError ("Invalid source file provided." )
279+
280+ source_file_type = parameters ['layer_type' ]
281+ if source_file_type not in [RASTER , VECTOR ]:
282+ raise ValueError ("Invalid file type." )
183283
184284 target_bbox = parameters ["target_bbox" ]
185285
186- # align the bounding box to a raster grid
187- source_raster_path = f'/vsicurl/{ parameters ["cog_url" ]} '
188- source_raster_info = cached_raster_info (source_raster_path )
286+ # align the bounding box
287+ source_file_path = f'/vsicurl/{ parameters ["file_url" ]} '
288+ if source_file_type == VECTOR :
289+ # source_file_path originates in `mappreview` extra; currently always .mvt,
290+ # but leaving this flexible in case we support .geojson again in the future
291+ source_file_path = os .path .splitext (source_file_path )[0 ] + '.fgb'
189292
190- warping_kwargs = {}
191- try :
192- warping_kwargs ['target_projection_wkt' ] = _epsg_to_wkt (
193- parameters ["target_epsg" ])
194- aligned_target_bbox = pygeoprocessing .transform_bounding_box (
195- target_bbox , source_raster_info ['projection_wkt' ],
196- warping_kwargs ['target_projection_wkt' ])
197- except KeyError :
198- # If we're keeping the same project, just align the requested bounding
199- # box to the raster's grid.
200- aligned_target_bbox = _align_bbox (target_bbox , source_raster_info )
293+ source_file_info = cached_file_info (source_file_path , source_file_type )
201294
202- try :
203- # Make sure pixel sizes are floats.
204- target_cellsize = list (map (float , parameters ["target_cellsize" ]))
205- except KeyError :
206- target_cellsize = source_raster_info ['pixel_size' ]
295+ if source_file_type == RASTER :
296+ app .logger .info ("CLIPPING RASTER..." )
297+ warping_kwargs = {}
298+ try :
299+ warping_kwargs ['target_projection_wkt' ] = _epsg_to_wkt (
300+ parameters ["target_epsg" ])
301+ aligned_target_bbox = pygeoprocessing .transform_bounding_box (
302+ target_bbox , source_file_info ['projection_wkt' ],
303+ warping_kwargs ['target_projection_wkt' ])
304+ except KeyError :
305+ # If we're keeping the same projection, just align the requested bounding
306+ # box to the raster's grid.
307+ aligned_target_bbox = _align_bbox (target_bbox , source_file_info )
308+
309+ try :
310+ # Make sure pixel sizes are floats.
311+ target_cellsize = list (map (float , parameters ["target_cellsize" ]))
312+ except KeyError :
313+ target_cellsize = source_file_info ['pixel_size' ]
207314
208- # make sure the target cell's height is negative
209- if not target_cellsize [1 ] < 0 :
210- target_cellsize [1 ] *= - 1
315+ # make sure the target cell's height is negative
316+ if not target_cellsize [1 ] < 0 :
317+ target_cellsize [1 ] *= - 1
211318
212- try :
213- # do the clipping
214- target_basename = os .path .splitext (
215- os .path .basename (parameters ["cog_url" ]))[0 ]
216- target_raster_path = os .path .join (
217- WORKSPACE_DIR , f'{ target_basename } --{ uuid .uuid4 ()} .tif' )
218- pygeoprocessing .warp_raster (
219- source_raster_path , target_cellsize , target_raster_path , 'near' ,
220- target_bb = aligned_target_bbox , ** warping_kwargs )
221- except Exception :
222- app .logger .exception ("Failed to warp raster; aborting" )
223- os .remove (target_raster_path )
224- raise
319+ try :
320+ # do the clipping
321+ target_basename = os .path .splitext (
322+ os .path .basename (parameters ["file_url" ]))[0 ]
323+ target_file_path = os .path .join (
324+ WORKSPACE_DIR , f'{ target_basename } --{ uuid .uuid4 ()} .tif' )
325+ pygeoprocessing .warp_raster (
326+ source_file_path , target_cellsize , target_file_path , 'near' ,
327+ target_bb = aligned_target_bbox , ** warping_kwargs )
328+ except Exception :
329+ app .logger .exception ("Failed to warp raster; aborting" )
330+ os .remove (target_file_path )
331+ raise
332+
333+ elif source_file_type == VECTOR :
334+ app .logger .info ("CLIPPING VECTOR..." )
335+ try :
336+ target_projection_wkt = _epsg_to_wkt (
337+ parameters ["target_epsg" ])
338+ except KeyError :
339+ target_projection_wkt = None
340+
341+ try :
342+ # do the clipping
343+ target_basename = os .path .splitext (
344+ os .path .basename (parameters ["file_url" ]))[0 ]
345+ target_file_path = os .path .join (
346+ WORKSPACE_DIR , f'{ target_basename } --{ uuid .uuid4 ()} .fgb' )
347+ _clip_vector_to_bounding_box (
348+ source_file_path , target_bbox , target_file_path ,
349+ target_projection_wkt )
350+ except Exception :
351+ app .logger .exception ("Failed to clip vector; aborting" )
352+ os .remove (target_file_path )
353+ raise
225354
226355 try :
227- filesize = humanize .naturalsize (os .path .getsize (target_raster_path ))
356+ filesize = humanize .naturalsize (os .path .getsize (target_file_path ))
228357
229358 today = datetime .datetime .now ().strftime ('%Y-%m-%d' )
230- bucket_filename = f"{ today } --{ os .path .basename (target_raster_path )} "
359+ bucket_filename = f"{ today } --{ os .path .basename (target_file_path )} "
231360
232361 app .logger .info (f"Uploading to bucket: { bucket_filename } " )
233362 bucketname = re .sub ('^gs://' , '' , TARGET_FILE_BUCKET )
234363 storage_client = storage .Client ()
235364 bucket = storage_client .bucket (bucketname )
236365 blob = bucket .blob (bucket_filename )
237- blob .upload_from_filename (target_raster_path )
366+ blob .upload_from_filename (target_file_path )
238367 except Exception :
239368 app .logger .exception ("Falling back to cmdline gsutil" )
240- subprocess .run (["gsutil" , "cp" , source_raster_path ,
369+ subprocess .run (["gsutil" , "cp" , source_file_path ,
241370 f'{ TARGET_FILE_BUCKET } /{ bucket_filename } ' ])
242371 finally :
243- app .logger .info (f"Deleting local file { target_raster_path } " )
244- os .remove (target_raster_path )
372+ app .logger .info (f"Deleting local file { target_file_path } " )
373+ os .remove (target_file_path )
245374
246375 downloadable_raster_path = f"{ TARGET_BUCKET_URL } /{ bucket_filename } "
247376 app .logger .info ("Returning URL: %s" , downloadable_raster_path )
0 commit comments