Skip to content

Commit fe281f1

Browse files
committed
👌 Subset dataframe properly to within lake polygon
Ice volume displacements (a proxy for subglacial lake volume discharge) did not seem right, and it's because the geographical subsetting was done using a bounding box rather than an actual polygon. Using geopandas (CPU-based) for this since there's only a few hundred points involved. Also decided I might as well randomly commit an enhancement to the point_in_polygon_gpu code, with a new poly_limit parameter for conserving GPU memory.
1 parent cbb88f8 commit fe281f1

File tree

5 files changed

+31
-11
lines changed

5 files changed

+31
-11
lines changed

atlxi_xover.ipynb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@
116116
"outputs": [],
117117
"source": [
118118
"# Choose one Antarctic active subglacial lake polygon with EPSG:3031 coordinates\n",
119-
"lake_name: str = \"Subglacial Lake Conway\"\n",
119+
"lake_name: str = \"Whillans 12\"\n",
120120
"lake_catalog = deepicedrain.catalog.subglacial_lakes()\n",
121121
"lake_ids: list = (\n",
122122
" pd.json_normalize(lake_catalog.metadata[\"lakedict\"])\n",
@@ -147,7 +147,11 @@
147147
"source": [
148148
"# Subset data to lake of interest\n",
149149
"placename: str = region.name.lower().replace(\" \", \"_\")\n",
150-
"df_lake: pd.DataFrame = region.subset(data=df_dhdt)"
150+
"df_lake: cudf.DataFrame = region.subset(data=df_dhdt) # bbox subset\n",
151+
"gdf_lake = gpd.GeoDataFrame(\n",
152+
" df_lake, geometry=gpd.points_from_xy(x=df_lake.x, y=df_lake.y, crs=3031)\n",
153+
")\n",
154+
"df_lake: pd.DataFrame = df_lake.loc[gdf_lake.within(lake.geometry)] # polygon subset"
151155
]
152156
},
153157
{

atlxi_xover.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484

8585
# %%
8686
# Choose one Antarctic active subglacial lake polygon with EPSG:3031 coordinates
87-
lake_name: str = "Subglacial Lake Conway"
87+
lake_name: str = "Whillans 12"
8888
lake_catalog = deepicedrain.catalog.subglacial_lakes()
8989
lake_ids: list = (
9090
pd.json_normalize(lake_catalog.metadata["lakedict"])
@@ -107,8 +107,11 @@
107107
# %%
108108
# Subset data to lake of interest
109109
placename: str = region.name.lower().replace(" ", "_")
110-
df_lake: pd.DataFrame = region.subset(data=df_dhdt)
111-
110+
df_lake: cudf.DataFrame = region.subset(data=df_dhdt) # bbox subset
111+
gdf_lake = gpd.GeoDataFrame(
112+
df_lake, geometry=gpd.points_from_xy(x=df_lake.x, y=df_lake.y, crs=3031)
113+
)
114+
df_lake: pd.DataFrame = df_lake.loc[gdf_lake.within(lake.geometry)] # polygon subset
112115

113116
# %%
114117
# Run crossover analysis on all tracks

deepicedrain/features/subglacial_lakes.feature

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,6 @@ Feature: Mapping Antarctic subglacial lakes
5959
Then we see a trend of active subglacial lake surfaces changing over time
6060

6161
Examples:
62-
| lake_name | location |
63-
| Whillans 7 | whillans_upstream |
64-
| Whillans 12 | whillans_downstream |
62+
| lake_name | location |
63+
| Whillans 7 | whillans_upstream |
64+
| Whillans 12 | whillans_downstream |

deepicedrain/spatiotemporal.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ def point_in_polygon_gpu(
222222
points_x_col: str = "x",
223223
points_y_col: str = "y",
224224
poly_label_col: str = None,
225+
poly_limit: int = 32,
225226
):
226227
"""
227228
Find polygon labels for each of the input points.
@@ -244,6 +245,11 @@ def point_in_polygon_gpu(
244245
e.g. "placename". Default is to automatically use the first column
245246
unless otherwise specified.
246247
248+
poly_limit : int
249+
Number of polygons to check in each loop of the point in polygon
250+
algorithm, workaround for a limitation in cuspatial. Default is 32
251+
(maximum), adjust to lower value (e.g. 16) if hitting MemoryError.
252+
247253
Returns
248254
-------
249255
point_labels : cudf.Series
@@ -278,11 +284,11 @@ def point_in_polygon_gpu(
278284
)
279285

280286
# Run the actual point in polygon algorithm!
281-
# Note that cuspatial's point_in_polygon function has a 31 polygon limit,
287+
# Note that cuspatial's point_in_polygon function has a 32 polygon limit,
282288
# hence the for-loop code below. See also
283289
# https://github.com/rapidsai/cuspatial/blob/branch-0.15/notebooks/nyc_taxi_years_correlation.ipynb
284290
num_poly: int = len(poly_df_)
285-
point_in_poly_iter: list = list(np.arange(0, num_poly, 31)) + [num_poly]
291+
point_in_poly_iter: list = list(np.arange(0, num_poly, poly_limit - 1)) + [num_poly]
286292
for i in range(len(point_in_poly_iter) - 1):
287293
start, end = point_in_poly_iter[i], point_in_poly_iter[i + 1]
288294
poly_labels: cudf.DataFrame = cuspatial.point_in_polygon(

deepicedrain/tests/conftest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55

66
import fsspec
7+
import geopandas as gpd
78
import numpy as np
89
import pandas as pd
910
import pytest
@@ -77,7 +78,13 @@ def lake_altimetry_data(lake_name: str, location: str, context) -> pd.DataFrame:
7778

7879
# Subset data to lake of interest
7980
context.placename: str = context.lake_name.lower().replace(" ", "_")
80-
df_lake: pd.DataFrame = context.region.subset(data=dataframe)
81+
df_lake: cudf.DataFrame = context.region.subset(data=dataframe) # bbox subset
82+
gdf_lake = gpd.GeoDataFrame(
83+
df_lake, geometry=gpd.points_from_xy(x=df_lake.x, y=df_lake.y, crs=3031)
84+
)
85+
df_lake: pd.DataFrame = df_lake.loc[
86+
gdf_lake.within(context.lake.geometry)
87+
] # polygon subset
8188

8289
# Save lake outline to OGR GMT file format
8390
os.makedirs(name=f"figures/{context.placename}", exist_ok=True)

0 commit comments

Comments
 (0)