11"""Methods related to sampling and smoothing elevations."""
22import time
3-
3+ from pathlib import Path
4+ import time
45import numpy as np
5-
6+ import pandas as pd
7+ import geopandas as gpd
8+ import rasterio
9+ from rasterstats import zonal_stats
10+ from shapely.geometry import LineString, Point
11+ from gisutils import get_authority_crs, project
612from sfrmaker.routing import get_nextupsegs, get_upsegs, make_graph
713
814
@@ -150,3 +156,226 @@ def reset_elevations(seg):
150156 if start_elevations is not None:
151157 return elevations, elevmax
152158 return elevations
159+
160+
161+ def sample_reach_elevations(sfr_reach_data, dem,
162+ sfrmaker_modelgrid=None,
163+ model_crs=None,
164+ method='buffers',
165+ buffer_distance=100,
166+ smooth=True,
167+ elevation_data=None,
168+ elevation_data_crs=None,
169+ elevation_data_layer=None,
170+ elevation_data_errors='raise',
171+ ):
172+ """Computes zonal statistics on a raster for SFR reaches, using
173+ either buffer polygons around the reach LineStrings, or the model
174+ grid cell polygons containing each reach.
175+
176+ Parameters
177+ ----------
178+ sfr_reach_data : pd.DataFrame
179+ Table of SFR reach information, equivalent to the SFRData.reach_data attribute.
180+ dem : path to valid raster dataset
181+ Must be in same Coordinate Reference System as model grid.
182+ sfrmaker_modelgrid : sfrmaker.grid class instance
183+ model_crs : obj
184+ Coordinate reference system for the model.
185+ Only needed if sfrmaker_modelgrid does not have an valid `crs` attribute.
186+ A Python int, dict, str, or :class:`pyproj.crs.CRS` instance
187+ passed to :meth:`pyproj.crs.CRS.from_user_input`
188+
189+ Can be any of:
190+ - PROJ string
191+ - Dictionary of PROJ parameters
192+ - PROJ keyword arguments for parameters
193+ - JSON string with PROJ parameters
194+ - CRS WKT string
195+ - An authority string [i.e. 'epsg:4326']
196+ - An EPSG integer code [i.e. 4326]
197+ - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
198+ - An object with a `to_wkt` method.
199+ - A :class:`pyproj.crs.CRS` class
200+ method : str; 'buffers' or 'cell polygons'
201+ If 'buffers', buffers (with flat caps; cap_style=2 in LineString.buffer())
202+ will be created around the reach LineStrings (geometry column in reach_data).
203+ buffer_distance : float
204+ Buffer distance to apply around reach LineStrings, in crs_units.
205+ smooth : bool
206+ Run sfrmaker.elevations.smooth_elevations on sampled elevations
207+ to ensure that they decrease monotonically in the downstream direction
208+ (default=True).
209+ elevation_data : str/pathlike or DataFrame
210+ Optional table of streambed top elevations at x, y locations. These will be mapped to
211+ individual reaches using the selected method, replacing the DEM-derived elevations.
212+ The points must fall within the selected buffer distance or cell polygon, otherwise
213+ an error will be raised. Points can be specified in a shapefile, geopackage, CSV file or
214+ (Geo)DataFrame, which must contain the following fields:
215+
216+ ========= ===============================================================================
217+ x x-coordinate. If a CSV or regular DataFrame is provided,
218+ this must be in the CRS of the model or an elevation_data_crs must be provided.
219+ y y-coordinate.
220+ elevation Elevation in the model units.
221+ ========= ================================================================================
222+
223+ By default, None.
224+ elevation_data_layer : None
225+ Layer name for the elevation data, if the data are being supplied in a multi-layer GeoPackage.
226+ by default, None.
227+ elevation_data_crs : obj
228+ Coordinate reference system of the x,y points in elevation-data.
229+ Only needed if the data do not have a valid ESRI projection (.prj) file.
230+ A Python int, dict, str, or :class:`pyproj.crs.CRS` instance
231+ passed to :meth:`pyproj.crs.CRS.from_user_input`
232+
233+ Can be any of:
234+ - PROJ string
235+ - Dictionary of PROJ parameters
236+ - PROJ keyword arguments for parameters
237+ - JSON string with PROJ parameters
238+ - CRS WKT string
239+ - An authority string [i.e. 'epsg:4326']
240+ - An EPSG integer code [i.e. 4326]
241+ - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
242+ - An object with a `to_wkt` method.
243+ - A :class:`pyproj.crs.CRS` class
244+ elevation_data_errors : {‘ignore’, ‘raise’}
245+ If ‘ignore’, suppress error when one or more points in elevation data is
246+ not associated with a stream reach (is out of geographic proximity).
247+ By default, ‘raise’.
248+
249+ Returns
250+ -------
251+ streambed_tops : dict of sampled elevations keyed by reach number
252+ """
253+ reach_data = sfr_reach_data.copy()
254+ reach_data.index = reach_data['rno']
255+
256+ if model_crs is None:
257+ if sfrmaker_modelgrid is not None:
258+ model_crs = sfrmaker_modelgrid.crs
259+ else:
260+ raise ValueError("sample_reach_elevations requires a valid model_crs "
261+ "or .crs attribute attached to sfrmaker_modelgrid")
262+
263+ # get the CRS and pixel size for the DEM
264+ with rasterio.open(dem) as src:
265+ raster_crs = get_authority_crs(src.crs)
266+
267+ # make sure buffer is large enough for DEM pixel size
268+ buffer_distance = np.max([np.sqrt(src.res[0] *
269+ src.res[1]) * 1.01,
270+ buffer_distance])
271+
272+ if method == 'buffers':
273+ assert isinstance(reach_data.geometry.values[0], LineString), \
274+ "Need LineString geometries in reach_data.geometry column to use buffer option."
275+ features = [g.buffer(buffer_distance, join_style='mitre', cap_style='flat')
276+ for g in reach_data.geometry]
277+ feature_descr = 'buffered LineStrings'
278+ elif method == 'cell polygons':
279+ assert sfrmaker_modelgrid is not None, \
280+ "Need an attached sfrmaker.Grid instance to use cell polygons option."
281+ # TODO: refactor this to use a Flopy modelgrid
282+ features = sfrmaker_modelgrid.df.loc[reach_data.node, 'geometry'].tolist()
283+ feature_descr = method
284+
285+ # to_crs features if they're not in the same crs
286+ if raster_crs != model_crs:
287+ features = project(features,
288+ model_crs,
289+ raster_crs)
290+
291+ print(f'running rasterstats.zonal_stats on {feature_descr}...')
292+ t0 = time.time()
293+ def get_min(x):
294+ return np.min(x[x > -1e38])
295+ results = zonal_stats(features,
296+ dem,
297+ add_stats={'nanmin': get_min},
298+ all_touched=True
299+ )
300+ sampled_elevations = [r['nanmin'] for r in results]
301+ print("finished in {:.2f}s\n".format(time.time() - t0))
302+
303+ if all(v is None for v in sampled_elevations):
304+ raise Exception(f'No {feature_descr} intersected with {dem}. Check projections.')
305+ if any(v is None for v in sampled_elevations):
306+ raise Exception(f'Some {feature_descr} not intersected with {dem}. '
307+ 'Check that DEM covers the area of the stream network.'
308+ )
309+ if elevation_data is not None:
310+ if isinstance(elevation_data, str) or isinstance(elevation_data, Path):
311+ measured_elevations = gpd.read_file(elevation_data, layer=elevation_data_layer)
312+ elif isinstance(elevation_data, pd.DataFrame):
313+ measured_elevations = elevation_data.copy()
314+ else:
315+ raise ValueError(f'Unrecognized input for elevation_data:\n{elevation_data}')
316+
317+ if isinstance(measured_elevations, gpd.GeoDataFrame):
318+ #x = [g.x for g in measured_elevations.geometry]
319+ #y = [g.y for g in measured_elevations.geometry]
320+ pass
321+ elif {'x', 'y'}.difference(measured_elevations.columns):
322+ raise ValueError(f'elevation_data input {elevation_data} needs to be '
323+ 'a shapefile, geopackage or GeoDataFrame with valid point features,\n'
324+ "or 'x' and 'y' columns are needed.")
325+ else:
326+ x_coords = measured_elevations['x'].astype(float)
327+ y_coords = measured_elevations['y'].astype(float)
328+ measured_elevations['geometry'] = [Point(x, y) for x, y in zip(x_coords, y_coords)]
329+ measured_elevations = gpd.GeoDataFrame(measured_elevations, crs=elevation_data_crs)
330+ measured_elevations['elevation'] = measured_elevations['elevation'].astype(float)
331+
332+ if measured_elevations.crs is None:
333+ if elevation_data_crs is not None:
334+ measured_elevations.crs = elevation_data_crs
335+ else:
336+ measured_elevations.crs = model_crs
337+ if measured_elevations.crs != model_crs:
338+ measured_elevations.to_crs(model_crs, inplace=True)
339+
340+ # Get stream buffers or model cell polygons containing each point
341+ sfr_reach_buffers = gpd.GeoDataFrame({
342+ 'rno': reach_data['rno'].values},
343+ geometry=features, crs=raster_crs)
344+ sfr_reach_buffers.to_crs(model_crs, inplace=True)
345+ # avoid potential geopandas error
346+ measured_elevations.drop('index_right', axis=1, errors='ignore', inplace=True)
347+ joined = sfr_reach_buffers.sjoin(measured_elevations, how='right')
348+ not_joined = ~measured_elevations['geometry'].isin(joined.dropna(subset='rno')['geometry'])
349+ if not_joined.any():
350+ if isinstance(elevation_data, str) or isinstance(elevation_data, Path):
351+ outpath = Path(elevation_data).parent
352+ else:
353+ outpath = Path('.')
354+ outfile = outpath / 'dropped_elevation_points.gpkg'
355+ measured_elevations.loc[not_joined].to_file(outfile, index=False)
356+ print(f"wrote {outfile}")
357+ message = ("The following measured elevations did not intersect an SFR cell, "
358+ f"or were not within the buffer distance of {buffer_distance}:\n"
359+ f"{measured_elevations.loc[not_joined]}\n"
360+ f"see {outfile}")
361+ if elevation_data_errors == 'raise':
362+ raise ValueError(message)
363+ else:
364+ print(message)
365+ joined.dropna(subset='rno', axis=0, inplace=True)
366+ # consolidate multiple elevations within an SFR reach by taking the minimum
367+ joined['rno'] = joined['rno'].astype(int)
368+ joined_consolidated = joined.groupby('rno').first()
369+ joined_consolidated['elevation'] = joined.groupby('rno')['elevation'].min()
370+
371+ sampled_elevations = pd.Series(sampled_elevations, index=reach_data['rno'])
372+ sampled_elevations.update(joined_consolidated['elevation'])
373+ sampled_elevations = sampled_elevations.tolist()
374+
375+ if smooth:
376+ streambed_tops = smooth_elevations(reach_data.rno.tolist(),
377+ reach_data.outreach.tolist(),
378+ sampled_elevations)
379+ else:
380+ streambed_tops = dict(zip(reach_data.rno, sampled_elevations))
381+ return streambed_tops
0 commit comments