Skip to content

Commit c343d64

Browse files
committed
Refactored Sentinel-2 preprocessing (openeo) to be useful for S3
1 parent 84e628d commit c343d64

File tree

7 files changed

+128
-64
lines changed

7 files changed

+128
-64
lines changed

efast/openeo/__init__.py

Whitespace-only changes.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .general import connect, TestArea
2+
from .general import extract_mask, distance_to_clouds
3+
from . import s2, s3
4+
5+
__all__ = ["TestArea", "connect", "extract_mask", "s2", "s3", "distance_to_clouds"]
Lines changed: 70 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
from typing import Mapping, Sequence
1+
import operator
2+
from typing import Tuple
23
import openeo
34
import numpy as np
45
import scipy
6+
from collections.abc import Callable, Mapping, Iterable
57

68

79
def connect():
810
return openeo.connect("https://openeo.dataspace.copernicus.eu/")
911

1012

11-
# TODO move somewhere else, not specific to sentinel-2
13+
1214
class TestArea:
1315
# aoi_wkt = "POINT (-15.432283 15.402828)"
1416
directions = ["west", "south", "east", "north"]
@@ -19,14 +21,14 @@ def __init__(
1921
self,
2022
*,
2123
bbox: Mapping[str, float] = bbox,
22-
s2_bands: Sequence[str] = ["B02", "B03", "B04", "B8A", "SCL"],
23-
s3_bands: Sequence[str] = [
24+
s2_bands: Iterable[str] = ["B02", "B03", "B04", "B8A", "SCL"],
25+
s3_bands: Iterable[str] = [
2426
"Syn_Oa04_reflectance",
2527
"Syn_Oa06_reflectance",
2628
"Syn_Oa08_reflectance",
2729
"Syn_Oa17_reflectance",
2830
],
29-
temporal_extent: Sequence[str] = ["2022-06-01", "2022-06-30"],
31+
temporal_extent: Iterable[str] = ["2022-06-01", "2022-06-30"],
3032
) -> None:
3133
self.bbox = bbox
3234
self.s2_bands = s2_bands
@@ -50,39 +52,6 @@ def get_s3_cube(self, connection):
5052
)
5153

5254

53-
def extract_cloud_mask(cube: openeo.DataCube):
54-
scl = cube.filter_bands(["SCL"])
55-
# 0: No data
56-
# 3: Cloud shadow
57-
# 8-10: Clouds
58-
# 11: Snow or ice
59-
60-
mask = (scl == 0) | (scl == 3) | (scl > 7)
61-
return mask
62-
63-
64-
def calculate_large_grid_cloud_mask(cube: openeo.DataCube, tolerance_percentage: float = 0.05, grid_side_length: int=300):
65-
cloud_mask = extract_cloud_mask(cube)
66-
# FIXME check also if there is negative or zero data, otherwise results will differ
67-
68-
# TODO this could better be resample_cube_spatial, because we are matching to a sentinel-3 cube
69-
cloud_mask = cloud_mask * 1.0 # convert to float
70-
cloud_mask_resampled = cloud_mask.resample_spatial(grid_side_length, method="average") # resample to sentinel-3 size
71-
72-
# TODO extract UDF to file
73-
# UDF to apply an element-wise less than operation. Normal "<" does not properly work on openEO datacubes
74-
udf = openeo.UDF(f"""
75-
import numpy as np
76-
import xarray as xr
77-
def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
78-
array = cube.get_array()
79-
array = array < {tolerance_percentage}
80-
#return XarrayDataCube(xr.DataArray(array, dims=["t", "x", "y", "bands"]))
81-
return XarrayDataCube(xr.DataArray(array, dims=["bands", "x", "y"]))
82-
""")
83-
return cloud_mask_resampled.apply(process=udf)
84-
85-
8655
def distance_to_clouds(
8756
cube: openeo.DataCube, tolerance_percentage=0.05, ratio=30, max_distance=255
8857
):
@@ -116,3 +85,66 @@ def _distance_to_clouds_udf(
11685
overlap=[],
11786
)
11887
return dtc
88+
89+
90+
def extract_mask(
91+
cube: openeo.DataCube,
92+
mask_values: Mapping[str, Iterable[int]],
93+
*,
94+
operations: Mapping[Tuple[str, int], Callable],
95+
) -> openeo.DataCube:
96+
"""
97+
Generic method to extract a mask from a data cube.
98+
Generate a mask that has a value of ``True`` whereever the band specified
99+
as a key in ``mask_values`` is equal to one of the values speicified
100+
as a value. Operations other than equality comparison can be specified
101+
via ``operations``.
102+
103+
import operator
104+
extract_mask(
105+
cube,
106+
mask_values = {
107+
"SCL": (0, 3, 7),
108+
},
109+
operations = {
110+
# use ``>`` instead of ``==``
111+
# to compare band ``"SCL"`` to value ``7``
112+
("SCL", 7): operator.gt,
113+
}
114+
)
115+
116+
# scl = cube.band("SCL")
117+
# mask = (scl == 0) | (scl == 3) | (scl > 7)
118+
119+
120+
:param cube: The data cube containing at least the band(s) used for masking
121+
:param mask_values: Mapping of band names to the values that should be masked.
122+
:param operations: Used to specify a comparison operation different to ``==``
123+
when comparing bands and values. Operations are applied as ``op(band, value)``
124+
125+
"""
126+
127+
assert(len(mask_values) > 0), "'mask_values' cannot be empty."
128+
def reduce_single_band(band_name, values_to_mask, mask=None):
129+
band = cube.band(band_name)
130+
vm_iter = iter(values_to_mask)
131+
132+
if mask is None:
133+
vm_first = next(vm_iter)
134+
mask = band == vm_first
135+
136+
for vm in vm_iter:
137+
op = operations.get((band_name, vm), operator.eq)
138+
mask |= op(band, vm)
139+
140+
return mask
141+
142+
first_band_name, *bands = mask_values.keys()
143+
first_vm, *values_to_mask = mask_values.values()
144+
145+
mask = reduce_single_band(first_band_name, first_vm, mask=None)
146+
147+
for band_name, values_to_mask in zip(bands, values_to_mask):
148+
mask |= reduce_single_band(band_name, values_to_mask, mask)
149+
150+
return mask

efast/openeo/preprocessing/s2.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import operator
2+
3+
import openeo
4+
5+
from . import extract_mask
6+
7+
8+
def extract_cloud_mask_s2(cube: openeo.DataCube) -> openeo.DataCube:
9+
return extract_mask(
10+
cube,
11+
{"SCL": [0, 3, 7]},
12+
operations={
13+
("SCL", 7): operator.gt,
14+
},
15+
)
16+
17+
def calculate_large_grid_cloud_mask(cube: openeo.DataCube, tolerance_percentage: float = 0.05, grid_side_length: int=300):
18+
cloud_mask = extract_cloud_mask_s2(cube)
19+
# FIXME check also if there is negative or zero data, otherwise results will differ
20+
21+
# TODO this could better be resample_cube_spatial, because we are matching to a sentinel-3 cube
22+
cloud_mask = cloud_mask * 1.0 # convert to float
23+
cloud_mask_resampled = cloud_mask.resample_spatial(grid_side_length, method="average") # resample to sentinel-3 size
24+
return cloud_mask_resampled < tolerance_percentage

efast/openeo/preprocessing/s3.py

Whitespace-only changes.

efast/s3_processing_openeo.py

Lines changed: 0 additions & 7 deletions
This file was deleted.

tests/test_preprocessing_openeo.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections.abc import Generator
22
import shutil
33
import time
4+
import pytest
45

56
from contextlib import contextmanager
67
from pathlib import Path
@@ -17,7 +18,11 @@
1718

1819
from openeo.udf import execute_local_udf
1920

20-
from efast import s2_processing, s2_processing_openeo, s3_processing
21+
from efast.openeo import preprocessing
22+
# TODO this should live somewhere else
23+
from efast.openeo.preprocessing import connect
24+
from efast import s2_processing, s3_processing
25+
2126

2227
TEST_DATA_ROOT = Path(__file__).parent.parent / "test_data"
2328
TEST_DATA_S2 = TEST_DATA_ROOT / "S2"
@@ -33,7 +38,8 @@
3338
TEST_DATE = "20220618"
3439
TEST_DATE_DASH = "2022-06-18"
3540

36-
SKIP_LOCAL = False
41+
SKIP_LOCAL = True
42+
DOWNLOAD_INTERMEDIATE_RESULTS = True
3743
SMALL_AREA = True
3844

3945

@@ -57,6 +63,7 @@ def create_temp_dir_and_copy_files(
5763
############################## S2 Processing ##############################
5864

5965

66+
@pytest.mark.skip
6067
def test_distance_to_cloud():
6168
with create_temp_dir_and_copy_files(TEST_DATA_S2, sub="raw/", pattern=None) as tmp:
6269
tmp_path = Path(tmp.name)
@@ -86,39 +93,41 @@ def test_distance_to_cloud():
8693

8794
# openeo
8895

89-
conn = s2_processing_openeo.connect()
96+
conn = connect()
9097
conn.authenticate_oidc()
9198

92-
test_area = s2_processing_openeo.TestArea(
99+
test_area = preprocessing.TestArea(
93100
bbox=bounds,
94101
s2_bands=["SCL"],
95102
temporal_extent=(TEST_DATE_DASH, TEST_DATE_DASH),
96103
)
97104
cube = test_area.get_s2_cube(conn)
98105

99-
dtc_input = s2_processing_openeo.calculate_large_grid_cloud_mask(
106+
dtc_input = preprocessing.s2.calculate_large_grid_cloud_mask(
100107
cube, tolerance_percentage=0.05, grid_side_length=300
101108
)
102-
dtc = s2_processing_openeo.distance_to_clouds(
109+
dtc = preprocessing.distance_to_clouds(
103110
dtc_input, tolerance_percentage=0.05, ratio=30
104111
)
105112
download_path = tmp_path / "test_distance_to_cloud.tif"
106113

107114
print("openEO execution")
108-
109-
# intermediate results for debugging
110-
BASE_DIR = Path("openeo_results")
111-
# BASE_DIR.mkdir(exist_ok=True)
112-
# cloud_mask.download(BASE_DIR / "cloud_mask.tif")
113-
# cloud_mask_resampled.download(BASE_DIR / "cloud_mask_resampled.tif")
114-
# dtc_input.download(BASE_DIR / "dtc_input.tif")
115-
116115
before = time.perf_counter()
117-
dtc.download(download_path)
118-
shutil.copy(download_path, BASE_DIR)
116+
#dtc.download(download_path)
119117
elapsed = time.perf_counter() - before
120118
print(f"executed and downloaded in {elapsed:.2f}s")
121119

120+
if DOWNLOAD_INTERMEDIATE_RESULTS:
121+
BASE_DIR = Path("openeo_results")
122+
BASE_DIR.mkdir(exist_ok=True)
123+
print("downloading input") # TMP
124+
(dtc_input * 1.0).download(BASE_DIR / "dtc_input.tif")
125+
#shutil.copy(download_path, BASE_DIR)
126+
print("downloading result") # TMP
127+
dtc.download(download_path) # TMP
128+
shutil.copy(download_path, BASE_DIR) # TMP
129+
130+
122131
with rasterio.open(download_path, "r") as ds:
123132
dtc_openeo = ds.read(1)
124133

@@ -140,7 +149,8 @@ def test_distance_to_cloud_synthetic_cube():
140149
.mean(1)
141150
) < tolerance
142151
cube = xr.DataArray(cube, dims=["x", "y"])
143-
cube_resampled = xr.DataArray(cube_resampled, dims=["x", "y"])
152+
#cube = cube.add_dimension(name="bands", label="mask", type="bands")
153+
cube_resampled = xr.DataArray(cube_resampled[np.newaxis, np.newaxis], dims=["bands", "t", "x", "y"])
144154

145155
udf = openeo.UDF.from_file("efast/distance_transform_udf.py")
146156
dtc_local_udf = (
@@ -245,7 +255,7 @@ def inner_data_acquisition_s3(tmpdir):
245255

246256
# openeo
247257

248-
conn = s2_processing_openeo.connect()
258+
conn = connect()
249259
conn.authenticate_oidc()
250260

251261
bands = [
@@ -256,7 +266,7 @@ def inner_data_acquisition_s3(tmpdir):
256266
"CLOUD_flags",
257267
"OLC_flags",
258268
]
259-
test_area = s2_processing_openeo.TestArea(
269+
test_area = preprocessing.TestArea(
260270
bbox=bounds, s3_bands=bands, temporal_extent=(TEST_DATE_DASH, TEST_DATE_DASH)
261271
)
262272
cube = test_area.get_s3_cube(conn)

0 commit comments

Comments
 (0)