Skip to content

Commit 036ea3e

Browse files
committed
refactor(NHDPlus HR waterbody preprocessing (preprocessing.py::preprocess_nhdplus_hr_waterbodies): represent NHDPlusIDs as strings, consistent with the rest of SFRmaker.
* include min_areasqkm value (with '>=' instead of '>') when culling waterbodies
1 parent ad8cb97 commit 036ea3e

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

sfrmaker/preprocessing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,7 +1957,7 @@ def preprocess_nhdplus_hr_waterbodies(nhdplus_path, active_area,
19571957
df = gpd.read_file(f, **kwargs)
19581958
dfs.append(df)
19591959
df = pd.concat(dfs)
1960-
wb_crs = df.crs
1960+
df['NHDPlusID'] = df['NHDPlusID'].astype(int).astype(str)
19611961
if dest_crs is None:
19621962
dest_crs = df.crs
19631963
else:
@@ -1969,9 +1969,9 @@ def preprocess_nhdplus_hr_waterbodies(nhdplus_path, active_area,
19691969
extent_poly = read_polygon_feature(
19701970
active_area, dest_crs=dest_crs)
19711971
intersects = np.array([g.intersects(extent_poly) for g in df.geometry])
1972-
loc = intersects & ~df['NHDPlusID'].isin(drop_waterbodies) & (df['AreaSqKm'] > min_areasqkm)
1972+
loc = intersects & ~df['NHDPlusID'].isin(drop_waterbodies) & (df['AreaSqKm'] >= min_areasqkm)
19731973
df = df.loc[loc].copy()
1974-
df['NHDPlusID'] = df['NHDPlusID'].astype(int)
1974+
19751975
df['FDate'] = pd.to_datetime(df['FDate']).dt.strftime('%Y-%m-%d')
19761976
df.to_file(outfile)
19771977
print(f'wrote {outfile}')

sfrmaker/test/test_preprocessing.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -420,15 +420,22 @@ def test_preprocess_nhdplus_hr_waterbodies(project_root_path, outdir):
420420
outfile = outdir / 'preprocessed_waterbodies.shp'
421421

422422
# drop these waterbodies, regardless of size
423-
drop_waterbodies = set()
423+
drop_waterbodies = {'75004400013339'}
424+
425+
expected_lakes = {#'75004400013854',
426+
'75004400011923',
427+
'75004400012773'}
424428

425429
preprocess_nhdplus_hr_waterbodies(nhdplus_path,
426-
active_area=(-151.00350, 60.64855, -150.96778, 60.67559),
430+
active_area=(-151.02, 60.64855, -150.96778, 60.67559),
427431
drop_waterbodies=drop_waterbodies,
428432
min_areasqkm=0.05,
429433
dest_crs=26905, outfile=outfile)
430434
df = gpd.read_file(outfile)
431435
df.crs == 26905
432-
assert 'Beaver Lake' in df['GNIS_Name'].values
433-
# the next line shouldn't be there either
434-
assert 75004400012864 not in df['NHDPlusID'].values
436+
df['NHDPlusID'] = df['NHDPlusID'].astype(int).astype(str)
437+
assert set(df['NHDPlusID']) == expected_lakes
438+
# this lake is < 0.05 km2; should have been culled
439+
assert '75004400012864' not in df['NHDPlusID'].values
440+
for nhdplusid in drop_waterbodies:
441+
assert int(nhdplusid) not in df['NHDPlusID'].values

0 commit comments

Comments
 (0)