Skip to content

Commit 77c27ba

Browse files
authored
Merge pull request #11 from mapchete/guess_geometry_empty_area
* `array.convert.to_masked_array()`: add `out_dtype` kwarg * `base.EODataCube`: enable reading area subsets of input cubes (added `read_mask` kwarg to read functions) * `base.EODataCube`: add `area_pixelbuffer` * `io.levelled_cubes.read_levelled_cube_to_np_array()`: add `read_mask` kwarg * `io.products.merge_products()`: fix `fill_value` on output array * `sort`: add `CloudCoverSort` * `search.stac_search`: lazy load `pystac_client.Client`
2 parents 24306f9 + e29bf5a commit 77c27ba

File tree

20 files changed

+286
-87
lines changed

20 files changed

+286
-87
lines changed

mapchete_eo/array/convert.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
import numpy.ma as ma
5+
from numpy.typing import DTypeLike
56
import xarray as xr
67
from mapchete.types import NodataVal
78

@@ -19,7 +20,9 @@
1920

2021

2122
def to_masked_array(
22-
xarr: Union[xr.Dataset, xr.DataArray], copy: bool = False
23+
xarr: Union[xr.Dataset, xr.DataArray],
24+
copy: bool = False,
25+
out_dtype: Optional[DTypeLike] = None,
2326
) -> ma.MaskedArray:
2427
"""Convert xr.DataArray to ma.MaskedArray."""
2528
if isinstance(xarr, xr.Dataset):
@@ -31,6 +34,9 @@ def to_masked_array(
3134
"Cannot create masked_array because DataArray fill value is None"
3235
)
3336

37+
if out_dtype:
38+
xarr = xarr.astype(out_dtype, copy=False)
39+
3440
if xarr.dtype in _NUMPY_FLOAT_DTYPES:
3541
return ma.masked_values(xarr, fill_value, copy=copy, shrink=False)
3642
else:

mapchete_eo/base.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import croniter
88
from mapchete import Bounds
9+
import numpy as np
910
import numpy.ma as ma
1011
import xarray as xr
1112
from dateutil.tz import tzutc
@@ -18,6 +19,8 @@
1819
from mapchete.types import MPathLike, NodataVal, NodataVals
1920
from pydantic import BaseModel
2021
from rasterio.enums import Resampling
22+
from rasterio.features import geometry_mask
23+
from shapely.geometry import mapping
2124
from shapely.geometry.base import BaseGeometry
2225

2326
from mapchete_eo.archives.base import Archive
@@ -62,6 +65,7 @@ class EODataCube(base.InputTile):
6265
eo_bands: dict
6366
time: List[TimeRange]
6467
area: BaseGeometry
68+
area_pixelbuffer: int = 0
6569

6670
def __init__(
6771
self,
@@ -367,6 +371,29 @@ def default_read_values(
367371
nodatavals=nodatavals,
368372
merge_products_by=merge_products_by,
369373
merge_method=merge_method,
374+
read_mask=self.get_read_mask(),
375+
)
376+
377+
def get_read_mask(self) -> np.ndarray:
378+
"""
379+
Determine read mask according to input area.
380+
381+
This will generate a numpy array where pixel overlapping the input area
382+
are set True and thus will get filled by the read function. Pixel outside
383+
of the area are not considered for reading.
384+
385+
On staged reading, i.e. first checking the product masks to assess valid
386+
pixels, this will avoid reading product bands in cases the product only covers
387+
pixels outside of the intended reading area.
388+
"""
389+
area = self.area.buffer(self.area_pixelbuffer * self.tile.pixel_x_size)
390+
if area.is_empty:
391+
return np.zeros((self.tile.shape), dtype=bool)
392+
return geometry_mask(
393+
geometries=[mapping(area)],
394+
out_shape=self.tile.shape,
395+
transform=self.tile.transform,
396+
invert=True,
370397
)
371398

372399

@@ -443,8 +470,9 @@ def _init_area(self, input_params: dict) -> BaseGeometry:
443470
input_params.get("delimiters", {}).get("bounds"),
444471
crs=getattr(input_params.get("pyramid"), "crs"),
445472
),
473+
raise_if_empty=False,
446474
)
447-
return process_area.intersection(
475+
process_area = process_area.intersection(
448476
reproject_geometry(
449477
configured_area,
450478
src_crs=configured_area_crs or self.crs,

mapchete_eo/io/items.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def item_to_np_array(
5656
return out
5757

5858

59-
def expand_params(param, length):
59+
def expand_params(param: Any, length: int) -> List[Any]:
6060
"""
6161
Expand parameters if they are not a list.
6262
"""
@@ -104,8 +104,10 @@ def get_item_property(
104104
| ``collection`` | The collection ID of an Item's collection. |
105105
+--------------------+--------------------------------------------------------+
106106
"""
107-
if property in ["year", "month", "day", "date", "datetime"]:
108-
if item.datetime is None:
107+
if property == "id":
108+
return item.id
109+
elif property in ["year", "month", "day", "date", "datetime"]:
110+
if item.datetime is None: # pragma: no cover
109111
raise ValueError(
110112
f"STAC item has no datetime attached, thus cannot get property {property}"
111113
)

mapchete_eo/io/levelled_cubes.py

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,50 @@ def read_levelled_cube_to_np_array(
4040
raise_empty: bool = True,
4141
out_dtype: DTypeLike = np.uint16,
4242
out_fill_value: NodataVal = 0,
43+
read_mask: Optional[np.ndarray] = None,
4344
) -> ma.MaskedArray:
4445
"""
4546
Read products as slices into a cube by filling up nodata gaps with next slice.
47+
48+
If a read_mask is provided, only the pixels marked True are considered to be read.
4649
"""
47-
if len(products) == 0:
50+
if len(products) == 0: # pragma: no cover
4851
raise NoSourceProducts("no products to read")
49-
5052
bands = assets or eo_bands
51-
if bands is None:
53+
if bands is None: # pragma: no cover
5254
raise ValueError("either assets or eo_bands have to be set")
53-
5455
out_shape = (target_height, len(bands), *grid.shape)
56+
57+
# 2D read_mask shape
58+
if read_mask is None:
59+
read_mask = np.ones(grid.shape, dtype=bool)
60+
elif read_mask.ndim != 2: # pragma: no cover
61+
raise ValueError(
62+
"read_mask must be 2-dimensional, not %s-dimensional",
63+
read_mask.ndim,
64+
)
5565
out: ma.MaskedArray = ma.masked_array(
56-
data=np.zeros(out_shape, dtype=out_dtype),
57-
mask=np.ones(out_shape, dtype=out_dtype),
66+
data=np.full(out_shape, out_fill_value, dtype=out_dtype),
67+
mask=np.ones(out_shape, dtype=bool),
5868
fill_value=out_fill_value,
5969
)
70+
71+
if not read_mask.any():
72+
logger.debug("nothing to read")
73+
return out
74+
75+
# extrude mask to match each layer
76+
layer_read_mask = np.stack([read_mask for _ in bands])
77+
78+
def _cube_read_mask() -> np.ndarray:
79+
# This is only needed for debug output, thus there is no need to materialize always
80+
return np.stack([layer_read_mask for _ in range(target_height)])
81+
6082
logger.debug(
61-
"empty cube with shape %s has %s",
83+
"empty cube with shape %s has %s and %s pixels to be filled",
6284
out.shape,
6385
pretty_bytes(out.size * out.itemsize),
86+
_cube_read_mask().sum(),
6487
)
6588

6689
logger.debug("sort products into slices ...")
@@ -76,25 +99,25 @@ def read_levelled_cube_to_np_array(
7699
slices_read_count, slices_skip_count = 0, 0
77100

78101
# pick slices one by one
79-
for slice_count, slice in enumerate(slices, 1):
102+
for slice_count, slice_ in enumerate(slices, 1):
80103
# all filled up? let's get outta here!
81104
if not out.mask.any():
82-
logger.debug("cube is full, quitting!")
105+
logger.debug("cube has no pixels to be filled, quitting!")
83106
break
84107

85108
# generate 2D mask of holes to be filled in output cube
86-
cube_nodata_mask = out.mask.any(axis=0).any(axis=0)
109+
cube_nodata_mask = np.logical_and(out.mask.any(axis=0).any(axis=0), read_mask)
87110

88111
# read slice
89112
try:
90113
logger.debug(
91114
"see if slice %s %s has some of the %s unmasked pixels for cube",
92115
slice_count,
93-
slice,
116+
slice_,
94117
cube_nodata_mask.sum(),
95118
)
96-
with slice.cached():
97-
slice_array = slice.read(
119+
with slice_.cached():
120+
slice_array = slice_.read(
98121
merge_method=merge_method,
99122
product_read_kwargs=dict(
100123
product_read_kwargs,
@@ -104,17 +127,18 @@ def read_levelled_cube_to_np_array(
104127
resampling=resampling,
105128
nodatavals=nodatavals,
106129
raise_empty=raise_empty,
107-
target_mask=~cube_nodata_mask.copy(),
130+
read_mask=cube_nodata_mask.copy(),
131+
out_dtype=out_dtype,
108132
),
109133
)
110134
slices_read_count += 1
111135
except (EmptySliceException, CorruptedSlice) as exc:
112-
logger.debug("skipped slice %s: %s", slice, str(exc))
136+
logger.debug("skipped slice %s: %s", slice_, str(exc))
113137
slices_skip_count += 1
114138
continue
115139

116140
# if slice was not empty, fill pixels into cube
117-
logger.debug("add slice %s array to cube", slice)
141+
logger.debug("add slice %s array to cube", slice_)
118142

119143
# iterate through layers of cube
120144
for layer_index in range(target_height):
@@ -124,34 +148,35 @@ def read_levelled_cube_to_np_array(
124148
continue
125149

126150
# determine empty patches of current layer
127-
empty_patches = out[layer_index].mask.copy()
128-
pixels_for_layer = (~slice_array[empty_patches].mask).sum()
151+
empty_patches = np.logical_and(out[layer_index].mask, layer_read_mask)
152+
remaining_pixels_for_layer = (~slice_array[empty_patches].mask).sum()
129153

130154
# when slice has nothing to offer for this layer, skip
131-
if pixels_for_layer == 0:
155+
if remaining_pixels_for_layer == 0:
132156
logger.debug(
133157
"layer %s: slice has no pixels for this layer, jump to next",
134158
layer_index,
135159
)
136160
continue
137161

162+
# insert slice data into empty patches of layer
138163
logger.debug(
139164
"layer %s: fill with %s pixels ...",
140165
layer_index,
141-
pixels_for_layer,
166+
remaining_pixels_for_layer,
142167
)
143-
# insert slice data into empty patches of layer
144168
out[layer_index][empty_patches] = slice_array[empty_patches]
145-
masked_pixels = out[layer_index].mask.sum()
146-
total_pixels = out[layer_index].size
147-
percent_full = round(
148-
100 * ((total_pixels - masked_pixels) / total_pixels), 2
149-
)
169+
170+
# report on layer fill status
150171
logger.debug(
151-
"layer %s: %s%% filled (%s empty pixels remaining)",
172+
"layer %s: %s",
152173
layer_index,
153-
percent_full,
154-
out[layer_index].mask.sum(),
174+
_percent_full(
175+
remaining=np.logical_and(
176+
out[layer_index].mask, layer_read_mask
177+
).sum(),
178+
total=layer_read_mask.sum(),
179+
),
155180
)
156181

157182
# remove slice values which were just inserted for next layer
@@ -161,13 +186,13 @@ def read_levelled_cube_to_np_array(
161186
logger.debug("slice fully inserted into cube, skipping")
162187
break
163188

164-
masked_pixels = out.mask.sum()
165-
total_pixels = out.size
166-
percent_full = round(100 * ((total_pixels - masked_pixels) / total_pixels), 2)
189+
# report on layer fill status
167190
logger.debug(
168-
"cube is %s%% filled (%s empty pixels remaining)",
169-
percent_full,
170-
masked_pixels,
191+
"cube is %s",
192+
_percent_full(
193+
remaining=np.logical_and(out.mask, _cube_read_mask()).sum(),
194+
total=_cube_read_mask().sum(),
195+
),
171196
)
172197

173198
logger.debug(
@@ -197,6 +222,7 @@ def read_levelled_cube_to_xarray(
197222
band_axis_name: str = "bands",
198223
x_axis_name: str = "x",
199224
y_axis_name: str = "y",
225+
read_mask: Optional[np.ndarray] = None,
200226
) -> xr.Dataset:
201227
"""
202228
Read products as slices into a cube by filling up nodata gaps with next slice.
@@ -218,6 +244,7 @@ def read_levelled_cube_to_xarray(
218244
sort=sort,
219245
product_read_kwargs=product_read_kwargs,
220246
raise_empty=raise_empty,
247+
read_mask=read_mask,
221248
),
222249
slice_names=[f"layer-{ii}" for ii in range(target_height)],
223250
band_names=variables,
@@ -226,3 +253,7 @@ def read_levelled_cube_to_xarray(
226253
x_axis_name=x_axis_name,
227254
y_axis_name=y_axis_name,
228255
)
256+
257+
258+
def _percent_full(remaining: int, total: int, ndigits: int = 2) -> str:
259+
return f"{round(100 * (total - remaining) / total, ndigits=ndigits)}% full ({remaining} remaining emtpy pixels)"

0 commit comments

Comments
 (0)