Skip to content

Commit 65ff684

Browse files
committed
Support unpacked "outer" directories
That is, unpacked download archived from the EnMAP portal, containing one or more Zip sub-archives.
1 parent c06d18a commit 65ff684

File tree

1 file changed

+32
-13
lines changed

1 file changed

+32
-13
lines changed

xarray_enmap/xarray_enmap.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def open_dataset(
4949
if path.is_file():
5050
ds = read_dataset_from_archive(filename_or_obj, self.temp_dir)
5151
elif path.is_dir():
52-
ds = read_dataset_from_directory(path)
52+
ds = read_dataset_from_unknown_directory(path, self.temp_dir)
5353
elif filename_or_obj.startswith("s3://"):
54-
ds = read_dataset_from_directory(filename_or_obj)
54+
ds = read_dataset_from_inner_directory(filename_or_obj)
5555
else:
5656
raise ValueError(
5757
f"{filename_or_obj} is neither a path nor a directory."
@@ -65,15 +65,31 @@ def close(self):
6565

6666

6767
def read_dataset_from_archive(
68-
input_filename: str, temp_dir: str
68+
input_filename: str | os.PathLike[Any], temp_dir: str
6969
) -> xr.Dataset:
7070
data_dirs = list(extract_archives(input_filename, temp_dir))
7171
if len(data_dirs) > 1:
7272
LOGGER.warning("Multiple data archives found; reading the first.")
73-
return read_dataset_from_directory(data_dirs[0])
73+
return read_dataset_from_inner_directory(data_dirs[0])
7474

7575

76-
def read_dataset_from_directory(data_dir: str | os.PathLike[Any]):
76+
def read_dataset_from_unknown_directory(
77+
data_dir: str | os.PathLike[Any], temp_dir: str
78+
):
79+
data_path = pathlib.Path(data_dir)
80+
metadata_files = list(data_path.glob("*METADATA.XML"))
81+
match len(metadata_files):
82+
case 0:
83+
# assume outer directory
84+
return read_dataset_from_archive(data_path, temp_dir)
85+
case 1:
86+
# assume inner directory
87+
return read_dataset_from_inner_directory(data_path)
88+
case _:
89+
raise RuntimeError("Too many METADATA.XML files")
90+
91+
92+
def read_dataset_from_inner_directory(data_dir: str | os.PathLike[Any]):
7793
data_path = pathlib.Path(data_dir)
7894
LOGGER.info(f"Processing {data_path}")
7995
arrays = {
@@ -203,13 +219,16 @@ def extract_archives(
203219
final_path = dest_path / "data"
204220
os.mkdir(final_path)
205221
archive_path = pathlib.Path(archive_path)
206-
if archive_path.name.endswith(".tar.gz"):
207-
# An EnMAP tgz usually contains one or more zip archives containing
208-
# the actual data files.
209-
outer_path = dest_path / "outer-archive"
210-
LOGGER.info(f"Extracting {archive_path.name}")
211-
with tarfile.open(archive_path) as tgz_file:
212-
tgz_file.extractall(path=outer_path, filter="data")
222+
if archive_path.name.endswith(".tar.gz") or archive_path.is_dir():
223+
if archive_path.is_dir():
224+
outer_path = archive_path
225+
else:
226+
# An EnMAP tgz usually contains one or more zip archives containing
227+
# the actual data files.
228+
outer_path = dest_path / "outer-archive"
229+
LOGGER.info(f"Extracting {archive_path.name}")
230+
with tarfile.open(archive_path) as tgz_file:
231+
tgz_file.extractall(path=outer_path, filter="data")
213232
data_paths = []
214233
for index, path_to_zip_file in enumerate(find_zips(outer_path)):
215234
data_paths.append(
@@ -219,7 +238,7 @@ def extract_archives(
219238
else:
220239
# Assume it's a zip and skip the outer archive extraction step.
221240
LOGGER.info(f"Assuming {archive_path} is an inner zipfile")
222-
return [(extract_zip(final_path, 0, inner_path, archive_path))]
241+
return [extract_zip(final_path, 0, inner_path, archive_path)]
223242

224243

225244
def find_zips(root: os.PathLike):

0 commit comments

Comments
 (0)