Skip to content

Commit 39a092d

Browse files
committed
Support opening of unzipped directories
Since xarray-enmap renames the data files when extracting from zips, the directory reading functionality was failing when run on a directory containing unmodified, unzipped archive contents. Addresses #3.
1 parent a094ddb commit 39a092d

File tree

1 file changed

+29
-13
lines changed

1 file changed

+29
-13
lines changed

xarray_enmap/xarray_enmap.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) 2025 by Brockmann Consult GmbH
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
4-
4+
import re
55
from collections.abc import Iterable
66
import logging
77
import os
@@ -10,7 +10,7 @@
1010
import shutil
1111
import tarfile
1212
import tempfile
13-
from typing import Any
13+
from typing import Any, Mapping
1414
import xml.etree
1515
import zipfile
1616

@@ -73,30 +73,43 @@ def read_dataset_from_archive(
7373
return read_dataset_from_directory(data_dirs[0])
7474

7575

76-
def read_dataset_from_directory(data_dir):
77-
LOGGER.info(f"Processing {data_dir}")
76+
def read_dataset_from_directory(data_dir: str | os.PathLike[Any]):
77+
data_path = pathlib.Path(data_dir)
78+
LOGGER.info(f"Processing {data_path}")
7879
arrays = {
79-
name: rioxarray.open_rasterio(
80-
str(data_dir) + "/" + (filename + ".TIF")
81-
).squeeze()
82-
for name, filename in VAR_MAP.items()
80+
name: rioxarray.open_rasterio(filename).squeeze()
81+
for name, filename in find_datafiles(data_path).items()
8382
}
8483
ds = xr.Dataset(arrays)
85-
add_metadata(ds, data_dir)
84+
add_metadata(ds, data_path)
8685
return ds
8786

8887

88+
def find_datafiles(data_path: pathlib.Path) -> Mapping[str, pathlib.Path]:
89+
assert data_path.is_dir()
90+
tiffs = list(data_path.glob("*.TIF"))
91+
result = {}
92+
for name, basename in VAR_MAP.items():
93+
pattern = f"(ENMAP.*)?{basename}.TIF"
94+
matches = [tiff for tiff in tiffs if re.match(pattern, tiff.name)]
95+
assert len(matches) > 0, f"Can't find TIFF for {name}"
96+
assert len(matches) < 2, f"Too many TIFFs for {name}"
97+
result[name] = matches[0]
98+
return result
99+
100+
89101
def add_metadata(ds: xr.Dataset, data_dir: pathlib.Path):
102+
metadata_paths = list(data_dir.glob("*METADATA.XML"))
103+
assert len(metadata_paths) == 1
104+
metadata_path = metadata_paths[0]
90105
if str(data_dir).startswith("s3://"):
91106
import fsspec
92107

93108
fs = fsspec.filesystem("s3")
94-
with fs.open(str(data_dir) + "/" + "METADATA.XML") as fh:
109+
with fs.open(metadata_path) as fh:
95110
root = xml.etree.ElementTree.parse(fh).getroot()
96111
else:
97-
root = xml.etree.ElementTree.parse(
98-
str(data_dir) + "/" + "METADATA.XML"
99-
).getroot()
112+
root = xml.etree.ElementTree.parse(metadata_path).getroot()
100113
points = root.findall("base/spatialCoverage/boundingPolygon/point")
101114
bounds = shapely.Polygon(
102115
[float(p.find("longitude").text), p.find("latitude").text]
@@ -232,6 +245,9 @@ def extract_zip(
232245
output_data_path = final_path / input_data_dir
233246
prefix_length = len(input_data_path.name) + 1
234247
os.mkdir(output_data_path)
248+
# Strip the long, redundant prefix from the filenames. Not visible anyway
249+
# via the xarray plugin, but convenient if using this function as a
250+
# standalone archive extractor.
235251
for filepath in input_data_path.iterdir():
236252
os.rename(filepath, output_data_path / filepath.name[prefix_length:])
237253
return output_data_path

0 commit comments

Comments
 (0)