Skip to content

Commit c62e085

Browse files
authored
Merge pull request #179 from natcap/feature/86-vector-clipping
Vector Clipping
2 parents 68d66c6 + 6c39eee commit c62e085

File tree

4 files changed

+233
-98
lines changed

4 files changed

+233
-98
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ env-clip:
3838
@echo ">> make clipping-service"
3939

4040
clipping-service:
41-
python -m gunicorn --chdir ./clipping-service/app app:app --reload
41+
python -m gunicorn --chdir ./clipping-service/app app:app --timeout 180 --reload
4242

4343
ckan-dev:
4444
docker compose -f docker-compose.dev.yml build

clipping-service/app/app.py

Lines changed: 202 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,16 @@
1515
import pygeoprocessing
1616
import pygeoprocessing.geoprocessing
1717
import requests
18+
import shapely.prepared
19+
import shapely.wkb
1820
import yaml
1921
from flask import jsonify
2022
from flask import request
2123
from flask_cors import CORS
2224
from google.cloud import storage
2325
from osgeo import gdal
2426
from osgeo import osr
27+
from osgeo import ogr
2528

2629
app = flask.Flask(__name__, template_folder='templates')
2730

@@ -58,20 +61,131 @@
5861
app.logger.info("WORKSPACE_DIR: %s", WORKSPACE_DIR)
5962
pygeoprocessing.geoprocessing._LOGGING_PERIOD = 1.0
6063

64+
RASTER = 'raster'
65+
VECTOR = 'vector'
66+
6167

6268
def _epsg_to_wkt(epsg_code):
6369
srs = osr.SpatialReference()
6470
srs.ImportFromEPSG(int(epsg_code))
6571
return srs.ExportToWkt()
6672

6773

74+
def _clip_vector_to_bounding_box(
75+
source_vector_path, target_bounding_box, target_vector_path,
76+
target_projection_wkt=None):
77+
"""Clip a vector to the intersection of a target bounding box.
78+
79+
Optionally also reproject the vector.
80+
81+
Args:
82+
source_vector_path (str): path to a FlatGeobuf to be clipped.
83+
target_bounding_box (list): list of the form [xmin, ymin, xmax, ymax]
84+
target_vector_path (str): path to a FlatGeobuf to store the clipped
85+
vector.
86+
target_projection_wkt=None (str): target projection in wkt. Can be
87+
none to indicate no reprojection is required.
88+
89+
Returns:
90+
None
91+
"""
92+
shapely_mask = shapely.prepared.prep(
93+
shapely.geometry.box(*target_bounding_box))
94+
95+
app.logger.debug("Opening base vector...")
96+
base_vector = gdal.OpenEx(source_vector_path, gdal.OF_VECTOR)
97+
base_layer = base_vector.GetLayer()
98+
base_layer.SetSpatialFilterRect(*target_bounding_box)
99+
base_layer_defn = base_layer.GetLayerDefn()
100+
base_geom_type = base_layer.GetGeomType()
101+
base_srs = base_layer.GetSpatialRef()
102+
103+
if target_projection_wkt is not None:
104+
target_srs = osr.SpatialReference()
105+
target_srs.ImportFromWkt(target_projection_wkt)
106+
coord_trans = osr.CreateCoordinateTransformation(base_srs, target_srs)
107+
else:
108+
target_srs = base_srs
109+
110+
app.logger.debug("Setting up target...")
111+
target_driver = gdal.GetDriverByName('FlatGeobuf')
112+
target_vector = target_driver.Create(
113+
target_vector_path, 0, 0, 0, gdal.GDT_Unknown)
114+
target_layer = target_vector.CreateLayer(
115+
base_layer_defn.GetName(), target_srs, base_geom_type)
116+
target_layer.CreateFields(base_layer.schema)
117+
118+
app.logger.debug("Clipping vector...")
119+
target_layer.StartTransaction()
120+
invalid_feature_count = 0
121+
n_processed = 0
122+
last_log_msg_time = time.time()
123+
for feature in base_layer:
124+
now = time.time()
125+
if now >= last_log_msg_time+2.0:
126+
app.logger.debug(f"Processed {n_processed} features so far")
127+
last_log_msg_time = now
128+
n_processed += 1
129+
130+
invalid = False
131+
geometry = feature.GetGeometryRef()
132+
try:
133+
shapely_geom = shapely.wkb.loads(bytes(geometry.ExportToWkb()))
134+
# Catch invalid geometries that cannot be loaded by Shapely;
135+
# e.g. polygons with too few points for their type
136+
except shapely.errors.ShapelyError:
137+
invalid = True
138+
else:
139+
if shapely_geom.is_valid:
140+
# Check for intersection rather than use gdal.Layer.Clip()
141+
# to preserve the shape of the polygons
142+
if shapely_mask.intersects(shapely_geom):
143+
# This appears to use the network WAY less
144+
new_feature = ogr.Feature.Clone(feature)
145+
146+
# If we need to, transform the geometry
147+
if target_projection_wkt is not None:
148+
geometry.Transform(coord_trans)
149+
new_feature.SetGeometry(geometry)
150+
151+
target_layer.CreateFeature(new_feature)
152+
else:
153+
invalid = True
154+
finally:
155+
if invalid:
156+
invalid_feature_count += 1
157+
app.logger.warning(
158+
f"The geometry at feature {feature.GetFID()} is invalid "
159+
"and will be skipped.")
160+
161+
target_layer.CommitTransaction()
162+
163+
if invalid_feature_count:
164+
app.logger.warning(
165+
f"{invalid_feature_count} features in {source_vector_path} "
166+
"were found to be invalid during clipping and were skipped.")
167+
168+
base_layer = None
169+
base_vector = None
170+
target_layer = None
171+
target_vector = None
172+
173+
68174
@functools.lru_cache
69-
def cached_raster_info(vsi_raster_path):
70-
try:
71-
return pygeoprocessing.get_raster_info(vsi_raster_path)
72-
except Exception:
73-
app.logger.error("Failed to read raster info for %s", vsi_raster_path)
74-
raise
175+
def cached_file_info(vsi_file_path, file_type):
176+
app.logger.info(f"Getting file info for {file_type} at {vsi_file_path}")
177+
if file_type == RASTER:
178+
try:
179+
return pygeoprocessing.get_raster_info(vsi_file_path)
180+
except Exception:
181+
app.logger.error("Failed to read raster info for %s", vsi_file_path)
182+
raise
183+
elif file_type == VECTOR:
184+
try:
185+
return pygeoprocessing.get_vector_info(vsi_file_path)
186+
except Exception:
187+
app.logger.error("Failed to read vector info for %s", vsi_file_path)
188+
raise
75189

76190

77191
def _align_bbox(bbox, raster_info):
@@ -123,19 +237,19 @@ def epsg_info():
123237

124238
@app.route('/metadata', methods=['GET'])
125239
def metadata():
126-
cog_url = request.args.get('cog_url')
240+
file_url = request.args.get('file_url')
127241

128242
yaml_data = yaml.load(
129-
requests.get(f'{cog_url}.yml', stream=True).content,
243+
requests.get(f'{file_url}.yml', stream=True).content,
130244
Loader=yaml.Loader)
131245
return yaml_data
132246

133247

134248
@app.route('/info', methods=['GET'])
135249
@functools.lru_cache
136250
def info():
137-
cog_url = request.args.get("cog_url")
138-
result = gdal.Info(f'/vsicurl/{cog_url}', options=['-json'])
251+
file_url = request.args.get("file_url")
252+
result = gdal.Info(f'/vsicurl/{file_url}', options=['-json'])
139253

140254
return jsonify({
141255
'status': 'success',
@@ -155,93 +269,108 @@ def clip_app():
155269

156270
@app.route("/clip", methods=['POST'])
157271
def clip():
158-
#data = flask.request.json
159-
#source_raster = data['cog_url']
160-
#target_bbox = data['target_bbox']
161-
#target_epsg = data['target_epsg']
162-
163-
#SOURCE_LOGGER.info(f"Clipping {source_raster} to {target_bbox}")
164-
165-
#for i in range(11):
166-
# time.sleep(1)
167-
# print(f"logging progress {i*10}%")
168-
# SOURCE_LOGGER.info(f"Progress: {i*10}%")
169-
#SOURCE_LOGGER.info("Done!")
170-
171-
#return jsonify({
172-
# 'status': 'success',
173-
# 'message': 'Clipping finished'
174-
#})
175-
176272
parameters = request.get_json()
177273
app.logger.info(parameters)
178274

179-
if not parameters['cog_url'].startswith(TRUSTED_BUCKET):
180-
app.logger.error("Invalid source raster, not in the known bucket: %s",
181-
parameters['cog_url'])
182-
raise ValueError("Invalid COG provided.")
275+
if not parameters['file_url'].startswith(TRUSTED_BUCKET):
276+
app.logger.error("Invalid source file, not in the known bucket: %s",
277+
parameters['file_url'])
278+
raise ValueError("Invalid source file provided.")
279+
280+
source_file_type = parameters['layer_type']
281+
if source_file_type not in [RASTER, VECTOR]:
282+
raise ValueError("Invalid file type.")
183283

184284
target_bbox = parameters["target_bbox"]
185285

186-
# align the bounding box to a raster grid
187-
source_raster_path = f'/vsicurl/{parameters["cog_url"]}'
188-
source_raster_info = cached_raster_info(source_raster_path)
286+
# align the bounding box
287+
source_file_path = f'/vsicurl/{parameters["file_url"]}'
288+
if source_file_type == VECTOR:
289+
# source_file_path originates in `mappreview` extra; currently always .mvt,
290+
# but leaving this flexible in case we support .geojson again in the future
291+
source_file_path = os.path.splitext(source_file_path)[0] + '.fgb'
189292

190-
warping_kwargs = {}
191-
try:
192-
warping_kwargs['target_projection_wkt'] = _epsg_to_wkt(
193-
parameters["target_epsg"])
194-
aligned_target_bbox = pygeoprocessing.transform_bounding_box(
195-
target_bbox, source_raster_info['projection_wkt'],
196-
warping_kwargs['target_projection_wkt'])
197-
except KeyError:
198-
# If we're keeping the same project, just align the requested bounding
199-
# box to the raster's grid.
200-
aligned_target_bbox = _align_bbox(target_bbox, source_raster_info)
293+
source_file_info = cached_file_info(source_file_path, source_file_type)
201294

202-
try:
203-
# Make sure pixel sizes are floats.
204-
target_cellsize = list(map(float, parameters["target_cellsize"]))
205-
except KeyError:
206-
target_cellsize = source_raster_info['pixel_size']
295+
if source_file_type == RASTER:
296+
app.logger.info("CLIPPING RASTER...")
297+
warping_kwargs = {}
298+
try:
299+
warping_kwargs['target_projection_wkt'] = _epsg_to_wkt(
300+
parameters["target_epsg"])
301+
aligned_target_bbox = pygeoprocessing.transform_bounding_box(
302+
target_bbox, source_file_info['projection_wkt'],
303+
warping_kwargs['target_projection_wkt'])
304+
except KeyError:
305+
# If we're keeping the same projection, just align the requested bounding
306+
# box to the raster's grid.
307+
aligned_target_bbox = _align_bbox(target_bbox, source_file_info)
308+
309+
try:
310+
# Make sure pixel sizes are floats.
311+
target_cellsize = list(map(float, parameters["target_cellsize"]))
312+
except KeyError:
313+
target_cellsize = source_file_info['pixel_size']
207314

208-
# make sure the target cell's height is negative
209-
if not target_cellsize[1] < 0:
210-
target_cellsize[1] *= -1
315+
# make sure the target cell's height is negative
316+
if not target_cellsize[1] < 0:
317+
target_cellsize[1] *= -1
211318

212-
try:
213-
# do the clipping
214-
target_basename = os.path.splitext(
215-
os.path.basename(parameters["cog_url"]))[0]
216-
target_raster_path = os.path.join(
217-
WORKSPACE_DIR, f'{target_basename}--{uuid.uuid4()}.tif')
218-
pygeoprocessing.warp_raster(
219-
source_raster_path, target_cellsize, target_raster_path, 'near',
220-
target_bb=aligned_target_bbox, **warping_kwargs)
221-
except Exception:
222-
app.logger.exception("Failed to warp raster; aborting")
223-
os.remove(target_raster_path)
224-
raise
319+
try:
320+
# do the clipping
321+
target_basename = os.path.splitext(
322+
os.path.basename(parameters["file_url"]))[0]
323+
target_file_path = os.path.join(
324+
WORKSPACE_DIR, f'{target_basename}--{uuid.uuid4()}.tif')
325+
pygeoprocessing.warp_raster(
326+
source_file_path, target_cellsize, target_file_path, 'near',
327+
target_bb=aligned_target_bbox, **warping_kwargs)
328+
except Exception:
329+
app.logger.exception("Failed to warp raster; aborting")
330+
os.remove(target_file_path)
331+
raise
332+
333+
elif source_file_type == VECTOR:
334+
app.logger.info("CLIPPING VECTOR...")
335+
try:
336+
target_projection_wkt = _epsg_to_wkt(
337+
parameters["target_epsg"])
338+
except KeyError:
339+
target_projection_wkt = None
340+
341+
try:
342+
# do the clipping
343+
target_basename = os.path.splitext(
344+
os.path.basename(parameters["file_url"]))[0]
345+
target_file_path = os.path.join(
346+
WORKSPACE_DIR, f'{target_basename}--{uuid.uuid4()}.fgb')
347+
_clip_vector_to_bounding_box(
348+
source_file_path, target_bbox, target_file_path,
349+
target_projection_wkt)
350+
except Exception:
351+
app.logger.exception("Failed to clip vector; aborting")
352+
os.remove(target_file_path)
353+
raise
225354

226355
try:
227-
filesize = humanize.naturalsize(os.path.getsize(target_raster_path))
356+
filesize = humanize.naturalsize(os.path.getsize(target_file_path))
228357

229358
today = datetime.datetime.now().strftime('%Y-%m-%d')
230-
bucket_filename = f"{today}--{os.path.basename(target_raster_path)}"
359+
bucket_filename = f"{today}--{os.path.basename(target_file_path)}"
231360

232361
app.logger.info(f"Uploading to bucket: {bucket_filename}")
233362
bucketname = re.sub('^gs://', '', TARGET_FILE_BUCKET)
234363
storage_client = storage.Client()
235364
bucket = storage_client.bucket(bucketname)
236365
blob = bucket.blob(bucket_filename)
237-
blob.upload_from_filename(target_raster_path)
366+
blob.upload_from_filename(target_file_path)
238367
except Exception:
239368
app.logger.exception("Falling back to cmdline gsutil")
240-
subprocess.run(["gsutil", "cp", source_raster_path,
369+
subprocess.run(["gsutil", "cp", source_file_path,
241370
f'{TARGET_FILE_BUCKET}/{bucket_filename}'])
242371
finally:
243-
app.logger.info(f"Deleting local file {target_raster_path}")
244-
os.remove(target_raster_path)
372+
app.logger.info(f"Deleting local file {target_file_path}")
373+
os.remove(target_file_path)
245374

246375
downloadable_raster_path = f"{TARGET_BUCKET_URL}/{bucket_filename}"
247376
app.logger.info("Returning URL: %s", downloadable_raster_path)

0 commit comments

Comments
 (0)