Skip to content

Commit 6aa7da0

Browse files
committed
Implement functional EnMAP reader
The backend can now actually read EnMAP archives. Some additional functionality (e.g. guess_can_open) remains to be implemented, and the code needs tidying up, but it's been tested successfully on EnMAP data.
1 parent 1973cba commit 6aa7da0

File tree

3 files changed

+205
-4
lines changed

3 files changed

+205
-4
lines changed

environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@ name: xrenmap
22
channels:
33
- conda-forge
44
dependencies:
5-
- netcdf4
5+
- rioxarray
6+
- shapely
67
- xarray

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ readme = {file = "README.md", content-type = "text/markdown"}
2020
license = {text = "MIT"}
2121
requires-python = ">=3.10"
2222
dependencies = [
23-
"netcdf4",
23+
"rioxarray",
24+
"shapely",
2425
"xarray",
2526
]
2627

xrenmap/xrenmap.py

Lines changed: 201 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,214 @@
1-
import os
21
from collections.abc import Iterable
2+
import logging
3+
import os
4+
import pathlib
5+
import rioxarray
6+
import shutil
7+
import tarfile
8+
import tempfile
39
from typing import Any
10+
import xml.etree
11+
import zipfile
412

13+
import shapely
514
import xarray as xr
615

16+
17+
LOGGER = logging.getLogger(__name__)
18+
19+
VAR_MAP = dict(
20+
reflectance="SPECTRAL_IMAGE",
21+
mask="QL_PIXELMASK",
22+
cirrus="QL_QUALITY_CIRRUS",
23+
classes="QL_QUALITY_CLASSES",
24+
cloudshadow="QL_QUALITY_CLOUDSHADOW",
25+
cloud="QL_QUALITY_CLOUD",
26+
haze="QL_QUALITY_HAZE",
27+
snow="QL_QUALITY_SNOW",
28+
testflags="QL_QUALITY_TESTFLAGS",
29+
# We omit the quicklook files QL_SWIR and QL_VNIR.
30+
)
31+
32+
733
class EnmapEntrypoint(xr.backends.BackendEntrypoint):
834

35+
temp_dir = None
36+
937
def open_dataset(
1038
self,
1139
filename_or_obj: str | os.PathLike[Any],
1240
*,
1341
drop_variables: str | Iterable[str] | None = None,
1442
) -> xr.Dataset:
15-
return xr.open_dataset(filename_or_obj)
43+
self.temp_dir = tempfile.mkdtemp(prefix="xrenmap-")
44+
ds = process(filename_or_obj, self.temp_dir)
45+
ds.set_close(self.close)
46+
return ds
47+
48+
def close(self):
49+
if self.temp_dir:
50+
shutil.rmtree(self.temp_dir)
51+
52+
53+
def process(
54+
input_filename: str,
55+
temp_dir: str,
56+
):
57+
return convert(input_filename, temp_dir)
58+
59+
60+
def convert(
61+
input_filename: str, temp_dir: str) -> xr.Dataset:
62+
data_dirs = list(extract_archives(input_filename, temp_dir))
63+
if len(data_dirs) > 1:
64+
LOGGER.warning("Multiple data archives found; reading the first.")
65+
return read_dataset_from_directory(data_dirs[0])
66+
67+
68+
def read_dataset_from_directory(data_dir):
69+
LOGGER.info(f"Processing {data_dir}")
70+
arrays = {
71+
name: rioxarray.open_rasterio(
72+
data_dir / (filename + ".TIF")
73+
).squeeze()
74+
for name, filename in VAR_MAP.items()
75+
}
76+
ds = xr.Dataset(arrays)
77+
add_metadata(ds, data_dir)
78+
return ds
79+
80+
81+
def add_metadata(ds: xr.Dataset, data_dir: pathlib.Path):
82+
root = xml.etree.ElementTree.parse(data_dir / "METADATA.XML").getroot()
83+
84+
points = root.findall("base/spatialCoverage/boundingPolygon/point")
85+
bounds = shapely.Polygon(
86+
[float(p.find("longitude").text), p.find("latitude").text]
87+
for p in points
88+
if p.find("frame").text != "center"
89+
)
90+
bbox = bounds.bounds
91+
92+
def text(xpath):
93+
return root.find(xpath).text
94+
95+
global_attrs = {
96+
"id": text("product/image/merge/name").removesuffix(
97+
"-SPECTRAL_IMAGE.TIF"
98+
),
99+
"title": text("metadata/comment"),
100+
"summary": text("metadata/citation"),
101+
"keywords": "EnMAP,hyperspectral,remote sensing",
102+
"Conventions": "ACDD-1.3,CF-1.8",
103+
"naming_authority": "de.dlr",
104+
"processing_level": "2A",
105+
"geospatial_bounds": shapely.to_wkt(bounds),
106+
"geospatial_bounds_crs": "EPSG:4326",
107+
"geospatial_lat_min": bbox[1],
108+
"geospatial_lat_max": bbox[3],
109+
"geospatial_lon_min": bbox[0],
110+
"geospatial_lon_max": bbox[2],
111+
"time_coverage_start": text("base/temporalCoverage/startTime"),
112+
"time_coverage_end": text("base/temporalCoverage/stopTime"),
113+
}
114+
ds.attrs.update(global_attrs)
115+
116+
var_attrs: dict[str, tuple] = {
117+
"reflectance": (
118+
"reflectance",
119+
"surface_bidirectional_reflectance",
120+
1,
121+
"physicalMeasurement",
122+
),
123+
"cirrus": (
124+
"cirrus mask",
125+
"cirrus",
126+
1,
127+
"qualityInformation",
128+
),
129+
"classes": (
130+
"area type",
131+
"area_type",
132+
1,
133+
"qualityInformation",
134+
{
135+
"flag_values": [1, 2, 3],
136+
"flag_meanings": ["Land", "Water", "Background"],
137+
},
138+
),
139+
"cloud": ("cloud mask", "cloud_binary_mask", 1, "qualityInformation"),
140+
"cloudshadow": (
141+
"cloud shadow",
142+
"cloud_shadow",
143+
1,
144+
"qualityInformation",
145+
),
146+
"haze": ("haze mask", "haze", 1, "qualityInformation"),
147+
"mask": ("pixel mask", "mask", 1, "qualityInformation"),
148+
"snow": (
149+
"snow mask",
150+
"surface_snow_binary_mask",
151+
1,
152+
"qualityInformation",
153+
),
154+
"testflags": ("test flags", "test_flags", 1, "qualityInformation"),
155+
}
156+
157+
for var, values in var_attrs.items():
158+
attrs = {
159+
"long_name": values[0],
160+
"standard_name": values[1],
161+
"units": values[2],
162+
"coverage_content_type": values[3],
163+
}
164+
if len(values) > 4:
165+
attrs.update(values[4])
166+
ds[var].attrs.update(attrs)
167+
168+
169+
def extract_archives(
170+
archive_path: os.PathLike | str, dest_dir: os.PathLike | str
171+
) -> Iterable[pathlib.Path]:
172+
dest_path = pathlib.Path(dest_dir)
173+
archive_path = pathlib.Path(archive_path)
174+
if archive_path.name.endswith(".tar.gz"):
175+
# An EnMAP tgz usually contains one or more zip archives
176+
# containing the actual data files.
177+
outer_path = dest_path / "outer-archive"
178+
LOGGER.info(f"Extracting {archive_path.name}")
179+
with tarfile.open(archive_path) as tgz_file:
180+
tgz_file.extractall(path=outer_path, filter="data")
181+
else:
182+
# Assume it's a zip and skip the outer archive
183+
# extraction step.
184+
LOGGER.info(f"Assuming {archive_path} is an inner zipfile")
185+
outer_path = archive_path.parent
186+
inner_path = dest_path / "inner-archive"
187+
188+
data_paths = []
189+
final_path = dest_path / "data"
190+
os.mkdir(final_path)
191+
for index, path_to_zip_file in enumerate(find_zips(outer_path)):
192+
LOGGER.info(f"Extracting {path_to_zip_file.name}")
193+
extract_path = inner_path / str(index)
194+
with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
195+
zip_ref.extractall(extract_path)
196+
input_data_path = list(extract_path.iterdir())[0]
197+
input_data_dir = input_data_path.name
198+
output_data_path = final_path / input_data_dir
199+
data_paths.append(output_data_path)
200+
prefix_length = len(input_data_path.name) + 1
201+
os.mkdir(output_data_path)
202+
for filepath in input_data_path.iterdir():
203+
os.rename(
204+
filepath, output_data_path / filepath.name[prefix_length:]
205+
)
206+
return data_paths
207+
208+
209+
def find_zips(root: os.PathLike):
210+
root_path = pathlib.Path(root)
211+
for parent, dirs, files in root_path.walk(on_error=print):
212+
for filename in files:
213+
if filename.endswith(".ZIP"):
214+
yield pathlib.Path(parent, filename)

0 commit comments

Comments
 (0)