Skip to content

Commit 846c0c3

Browse files
committed
feat: get_collection_gdf, several fixes, improvements and docs
1 parent 8970755 commit 846c0c3

File tree

12 files changed

+2021
-91
lines changed

12 files changed

+2021
-91
lines changed

README.md

Lines changed: 342 additions & 16 deletions
Large diffs are not rendered by default.

docs/api.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
11
# API
22

3+
## STAC client
4+
5+
```{eval-rst}
6+
.. automodule:: swisstopopy.stac
7+
:members:
8+
```
9+
10+
## Geospatial utilities
11+
12+
### Buildings
13+
14+
```{eval-rst}
15+
.. automodule:: swisstopopy.buildings
16+
:members:
17+
```
18+
19+
### Digital elevation model (DEM)
20+
21+
```{eval-rst}
22+
.. automodule:: swisstopopy.dem
23+
:members:
24+
```
25+
26+
### Tree canopy raster
27+
328
```{eval-rst}
4-
.. automodule:: swisstopopy
29+
.. automodule:: swisstopopy.tree_canopy
530
:members:
631
```

docs/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ hidden:
44
maxdepth: 1
55
---
66
7+
overview
78
api
89
contributing
910
changelog

docs/overview.ipynb

Lines changed: 1413 additions & 0 deletions
Large diffs are not rendered by default.

figures/tiles.png

164 KB
Loading

figures/tree-canopy.png

201 KB
Loading

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ test = [
6464
[project.urls]
6565
Repository = "https://github.com/martibosch/swisstopopy"
6666

67+
[tool.codespell]
68+
skip = "docs/overview.ipynb"
69+
6770
[tool.commitizen]
6871
major_version_zero = true
6972
name = "cz_conventional_commits"

swisstopopy/buildings.py

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
Raster and swissALTI3D products provided by swisstopo's STAC API.
55
"""
66

7+
import warnings
8+
79
import geopandas as gpd
810
import osmnx as ox
911
import pandas as pd
@@ -73,9 +75,7 @@ def get_bldg_gdf(
7375
region: RegionType,
7476
*,
7577
region_crs: CRSType = None,
76-
surface3d_datetime: DatetimeLike | None = None,
77-
alti3d_datetime: DatetimeLike | None = None,
78-
alti3d_res: float = 0.5,
78+
item_datetime: DatetimeLike | None = None,
7979
**pooch_retrieve_kwargs: utils.KwargsType,
8080
) -> gpd.GeoDataFrame:
8181
"""Get buildings geo-data frame with height information.
@@ -88,12 +88,10 @@ def get_bldg_gdf(
8888
Coordinate reference system (CRS) of the region. Required if `region` is a naive
8989
geometry or a list of bounding box coordinates. Ignored if `region` already has
9090
a CRS.
91-
surface3d_datetime, alti3d_datetime : datetime-like, optional
92-
Datetime to filter swissSURFACE3D and swissALTI3D data respectively, forwarded
93-
to `pystac_client.Client.search`. If None, the latest data for each tile is
94-
used.
95-
alti3d_res : {0.5, 2}, default 2
96-
Resolution of the swissALTI3D data to get, can be 0.5 or 2 (meters).
91+
item_datetime : datetime-like, optional
92+
Datetime to filter swissSURFACE3D Raster and swissALTI3D data to use (must be
93+
the same for both collections), forwarded to `pystac_client.Client.search`.
94+
If None, the latest data for each tile is used.
9795
pooch_retrieve_kwargs : mapping, optional
9896
Additional keyword arguments to pass to `pooch.retrieve`.
9997
@@ -106,42 +104,59 @@ def get_bldg_gdf(
106104
# 1. we first need to project the region to OSM CRS (EPSG:4326) to query via osmnx
107105
# 2. we drop the "node" column to keep only the "way" and "relation" columns that
108106
# correspond to polygon geometries
109-
region_gser = RegionMixin._process_region_arg(region, crs=region_crs)
107+
region_gser = RegionMixin._process_region_arg(region, crs=region_crs)["geometry"]
110108
bldg_gdf = (
111109
ox.features_from_polygon(
112110
region_gser.to_crs(ox.settings.default_crs).iloc[0],
113111
tags=OSMNX_TAGS,
114112
)
115113
.to_crs(stac.CH_CRS)
116-
.drop("node")
114+
.drop("node", errors="ignore")
117115
)
118116

119117
# use the STAC API to get building heights from swissSURFACE3D and swissALTI3D
120118
client = stac.SwissTopoClient(region_gser)
121119

120+
def _get_and_process_gdf(collection_id):
121+
gdf = client.get_collection_gdf(
122+
collection_id,
123+
datetime=item_datetime,
124+
)
125+
if gdf.empty:
126+
# warn and return gdf without heights
127+
warnings.warn(
128+
f"No '{collection_id}' data available for the specified region and "
129+
"datetime."
130+
)
131+
return gdf
132+
# filter to get tiff images only
133+
gdf = gdf[gdf["assets.href"].str.endswith(".tif")]
134+
# filter to get the resolution data at the specified resolution
135+
# AFAIK, there is no swissSURFACE3D Raster collection with 2 m resolution, so we
136+
# can only set the resolution to 0.5 m
137+
gdf = gdf[gdf["assets.eo:gsd"] == 0.5]
138+
# if no datetime specified, get the latest data for each tile (location)
139+
if item_datetime is None:
140+
gdf = stac.get_latest(gdf)
141+
return gdf
142+
122143
# surface3d-raster (raster dsm)
123-
surface3d_gdf = client.gdf_from_collection(
124-
stac.SWISSSURFACE3D_RASTER_COLLECTION_ID,
125-
datetime=surface3d_datetime,
144+
surface3d_raster_gdf = _get_and_process_gdf(
145+
stac.SWISSSURFACE3D_RASTER_COLLECTION_ID
126146
)
127-
# filter to get tiff images only
128-
surface3d_gdf = surface3d_gdf[surface3d_gdf["assets.href"].str.endswith(".tif")]
129-
# if no datetime specified, get the latest data for each tile (location)
130-
if surface3d_datetime is None:
131-
surface3d_gdf = stac.get_latest(surface3d_gdf)
132147

133148
# alti3d (raster dem)
134-
alti3d_gdf = client.gdf_from_collection(
149+
alti3d_gdf = _get_and_process_gdf(
135150
stac.SWISSALTI3D_COLLECTION_ID,
136-
datetime=alti3d_datetime,
137151
)
138-
# filter to get tiff images only
139-
alti3d_gdf = alti3d_gdf[alti3d_gdf["assets.href"].str.endswith(".tif")]
140-
# filter to get the resolution data at the specified resolution
141-
alti3d_gdf = alti3d_gdf[alti3d_gdf["assets.eo:gsd"] == alti3d_res]
142-
# if no datetime specified, get the latest data for each tile (location)
143-
if alti3d_datetime is None:
144-
alti3d_gdf = stac.get_latest(alti3d_gdf)
152+
153+
# both collections need to have items for the selected datetimes
154+
if surface3d_raster_gdf.empty or alti3d_gdf.empty:
155+
# warn and return gdf without heights
156+
# note that warnings will have already been raised above, so this follows from
157+
# there
158+
warnings.warn("Returning building footprints without height information.")
159+
return bldg_gdf
145160

146161
# compute the building heights as zonal statistics. To that end, we first compute
147162
# a "building height raster" as the difference between the swissSURFACE3D (surface
@@ -155,7 +170,7 @@ def get_bldg_gdf(
155170

156171
# we need to project the gdf of tiles to the same CRS as the actual swissSURFACE3D
157172
# and swissALTI3D products (again, EPSG:2056)
158-
tile_gdf = surface3d_gdf.sjoin(
173+
tile_gdf = surface3d_raster_gdf.sjoin(
159174
alti3d_gdf, how="inner", predicate="contains"
160175
).to_crs(stac.CH_CRS)
161176

swisstopopy/dem.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def get_dem_raster(
2020
*,
2121
region_crs: CRSType = None,
2222
alti3d_datetime: DatetimeLike | None = None,
23-
alti3d_res: float = 2,
23+
alti3d_res: float = 0.5,
2424
pooch_retrieve_kwargs: utils.KwargsType = None,
2525
rio_merge_kwargs: utils.KwargsType = None,
2626
) -> None:
@@ -39,7 +39,7 @@ def get_dem_raster(
3939
alti3d_datetime : datetime-like, optional
4040
Datetime to filter swissALTI3D data, forwarded to `pystac_client.Client.search`.
4141
If None, the latest data for each tile is used.
42-
alti3d_res : {0.5, 2}, default 2
42+
alti3d_res : {0.5, 2}, default 0.5
4343
Resolution of the swissALTI3D data to get, can be 0.5 or 2 (meters).
4444
pooch_retrieve_kwargs, rio_merge_kwargs : mapping, optional
4545
Additional keyword arguments to respectively pass to `pooch.retrieve` and
@@ -48,10 +48,15 @@ def get_dem_raster(
4848
"""
4949
# use the STAC API to get the DEM from swissALTI3D
5050
client = stac.SwissTopoClient(region, region_crs=region_crs)
51-
alti3d_gdf = client.gdf_from_collection(
51+
alti3d_gdf = client.get_collection_gdf(
5252
stac.SWISSALTI3D_COLLECTION_ID,
5353
datetime=alti3d_datetime,
5454
)
55+
if alti3d_gdf.empty:
56+
raise ValueError(
57+
"Cannot compute DEM raster: no data available for the specified region and "
58+
"datetime."
59+
)
5560

5661
# filter to get tiff images only
5762
alti3d_gdf = alti3d_gdf[alti3d_gdf["assets.href"].str.endswith(".tif")]

swisstopopy/stac.py

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
import warnings
44
from collections.abc import Iterator
5-
from copy import deepcopy
65

76
import geopandas as gpd
7+
import numpy as np
88
import pandas as pd
99
import pystac_client
1010
from pyregeon import CRSType, RegionMixin, RegionType
1111
from pystac_client.item_search import DatetimeLike
12-
from shapely.geometry import shape
12+
from shapely import box
1313

1414
# import all constants too
1515
# __all__ = ["get_latest", "SwissTopoClient"]
@@ -45,17 +45,16 @@
4545
# see pystac-client.readthedocs.io/en/stable/tutorials/stac-metadata-viz.html#GeoPandas
4646
def _items_to_gdf(items: Iterator) -> gpd.GeoDataFrame:
4747
"""Convert a list of STAC Items into a geo-data frame."""
48-
_items = []
49-
for i in items:
50-
_i = deepcopy(i)
51-
_i["geometry"] = shape(_i["geometry"])
52-
_items.append(_i)
53-
gdf = gpd.GeoDataFrame(pd.json_normalize(_items))
54-
for field in ["properties.datetime", "properties.created", "properties.updated"]:
55-
if field in gdf:
56-
gdf[field] = pd.to_datetime(gdf[field])
57-
# gdf.set_index("properties.datetime", inplace=True)
58-
return gdf
48+
# TODO: use polars or stacrs to improve performance
49+
gdf = pd.json_normalize(list(items))
50+
if gdf.empty:
51+
# there is no data for the specified datetime, warn and return empty gdf
52+
warnings.warn(
53+
"No data available for the specified datetime and collection. Returning an "
54+
"empty data frame."
55+
)
56+
return gdf
57+
return gpd.GeoDataFrame(gdf, geometry=box(*np.array(gdf["bbox"].tolist()).T))
5958

6059

6160
def _postprocess_items_gdf(items_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
@@ -96,28 +95,56 @@ def expand_row(row):
9695
axis="columns",
9796
)
9897

99-
return pd.concat(
98+
gdf = pd.concat(
10099
[expand_row(row) for _, row in items_gdf.iterrows()], ignore_index=True
101100
)
102101

102+
# set datetime columns to datetime
103+
for field in ["properties.datetime", "properties.created", "properties.updated"]:
104+
if field in gdf:
105+
gdf[field] = pd.to_datetime(gdf[field])
106+
107+
return gdf
108+
103109

104110
def get_latest(
105-
gdf: gpd.GeoDataFrame,
111+
collection_gdf: gpd.GeoDataFrame,
106112
*,
107-
id_col: str = "id",
113+
tile_id_col: str = "id",
108114
datetime_col: str = "properties.datetime",
109115
) -> gpd.GeoDataFrame:
110-
"""Get the latest item for each tile."""
116+
"""Get the latest item for each tile and file metadata.
117+
118+
Parameters
119+
----------
120+
collection_gdf : geopandas.GeoDataFrame
121+
Collection geo-data frame.
122+
tile_id_col : str, default "id"
123+
Column name for the tile ID.
124+
datetime_col : str, default "properties.datetime"
125+
Column name for the datetime.
126+
127+
Returns
128+
-------
129+
collection_gdf : geopandas.GeoDataFrame
130+
Collection geo-data frame with the latest item for each tile and file metadata.
131+
"""
132+
by = [
133+
collection_gdf[tile_id_col].str.split("_").str[-1],
134+
collection_gdf["assets.type"],
135+
]
136+
if "assets.eo:gsd" in collection_gdf:
137+
by.append(collection_gdf["assets.eo:gsd"])
111138
return (
112-
gdf.sort_values(
139+
collection_gdf.sort_values(
113140
datetime_col,
114141
ascending=False,
115142
)
116-
.groupby(gdf[id_col].str.split("_").str[-1])
143+
.groupby(by)
117144
.first()
118-
.rename_axis(index={id_col: "tile_id"})
119-
.reset_index()
120-
.set_crs(gdf.crs)
145+
.rename_axis(index={tile_id_col: "tile_id"})
146+
.reset_index(drop=True)
147+
.set_crs(collection_gdf.crs)
121148
)
122149

123150

@@ -162,7 +189,7 @@ def __init__(
162189
# keyword argument in `pystac_client.client.Search`.
163190
self.region = None
164191

165-
def gdf_from_collection(
192+
def get_collection_gdf(
166193
self,
167194
collection_id: str,
168195
*,
@@ -182,12 +209,21 @@ def gdf_from_collection(
182209
Coordinate reference system (CRS) of the returned geo-data frame. If None,
183210
the CRS of the collection tiles will be used - note that this is not
184211
(necessarily) the same as the CRS of the tile data itself.
212+
213+
Returns
214+
-------
215+
collection_gdf : geopandas.GeoDataFrame
216+
Geo-data frame of the collection tiles.
185217
"""
186218
if dst_crs is None:
187219
dst_crs = self._client.get_collection(collection_id).extra_fields["crs"][0]
188220
search = self._client.search(
189221
collections=[collection_id], intersects=self.region, datetime=datetime
190222
)
191-
return gpd.GeoDataFrame(
192-
_postprocess_items_gdf(_items_to_gdf(search.items_as_dicts()))
193-
).set_crs(dst_crs)
223+
gdf = _items_to_gdf(search.items_as_dicts())
224+
if gdf.empty:
225+
# the warning has already been raised in `_items_to_gdf`, do not raise it
226+
# again, just return an empty data frame
227+
# TODO: best approach to handle this? i.e., where to raise the warning?
228+
return gdf
229+
return gpd.GeoDataFrame(_postprocess_items_gdf(gdf)).set_crs(dst_crs)

0 commit comments

Comments
 (0)