@@ -49,9 +49,9 @@ def open_dataset(
4949 if path .is_file ():
5050 ds = read_dataset_from_archive (filename_or_obj , self .temp_dir )
5151 elif path .is_dir ():
52- ds = read_dataset_from_directory (path )
52+ ds = read_dataset_from_unknown_directory (path , self . temp_dir )
5353 elif filename_or_obj .startswith ("s3://" ):
54- ds = read_dataset_from_directory (filename_or_obj )
54+ ds = read_dataset_from_inner_directory (filename_or_obj )
5555 else :
5656 raise ValueError (
5757 f"{ filename_or_obj } is neither a path nor a directory."
@@ -65,15 +65,31 @@ def close(self):
6565
6666
6767def read_dataset_from_archive (
68- input_filename : str , temp_dir : str
68+ input_filename : str | os . PathLike [ Any ] , temp_dir : str
6969) -> xr .Dataset :
7070 data_dirs = list (extract_archives (input_filename , temp_dir ))
7171 if len (data_dirs ) > 1 :
7272 LOGGER .warning ("Multiple data archives found; reading the first." )
73- return read_dataset_from_directory (data_dirs [0 ])
73+ return read_dataset_from_inner_directory (data_dirs [0 ])
7474
7575
76- def read_dataset_from_directory (data_dir : str | os .PathLike [Any ]):
76+ def read_dataset_from_unknown_directory (
77+ data_dir : str | os .PathLike [Any ], temp_dir : str
78+ ):
79+ data_path = pathlib .Path (data_dir )
80+ metadata_files = list (data_path .glob ("*METADATA.XML" ))
81+ match len (metadata_files ):
82+ case 0 :
83+ # assume outer directory
84+ return read_dataset_from_archive (data_path , temp_dir )
85+ case 1 :
86+ # assume inner directory
87+ return read_dataset_from_inner_directory (data_path )
88+ case _:
89+ raise RuntimeError ("Too many METADATA.XML files" )
90+
91+
92+ def read_dataset_from_inner_directory (data_dir : str | os .PathLike [Any ]):
7793 data_path = pathlib .Path (data_dir )
7894 LOGGER .info (f"Processing { data_path } " )
7995 arrays = {
@@ -203,13 +219,16 @@ def extract_archives(
203219 final_path = dest_path / "data"
204220 os .mkdir (final_path )
205221 archive_path = pathlib .Path (archive_path )
206- if archive_path .name .endswith (".tar.gz" ):
207- # An EnMAP tgz usually contains one or more zip archives containing
208- # the actual data files.
209- outer_path = dest_path / "outer-archive"
210- LOGGER .info (f"Extracting { archive_path .name } " )
211- with tarfile .open (archive_path ) as tgz_file :
212- tgz_file .extractall (path = outer_path , filter = "data" )
222+ if archive_path .name .endswith (".tar.gz" ) or archive_path .is_dir ():
223+ if archive_path .is_dir ():
224+ outer_path = archive_path
225+ else :
226+ # An EnMAP tgz usually contains one or more zip archives containing
227+ # the actual data files.
228+ outer_path = dest_path / "outer-archive"
229+ LOGGER .info (f"Extracting { archive_path .name } " )
230+ with tarfile .open (archive_path ) as tgz_file :
231+ tgz_file .extractall (path = outer_path , filter = "data" )
213232 data_paths = []
214233 for index , path_to_zip_file in enumerate (find_zips (outer_path )):
215234 data_paths .append (
@@ -219,7 +238,7 @@ def extract_archives(
219238 else :
220239 # Assume it's a zip and skip the outer archive extraction step.
221240 LOGGER .info (f"Assuming { archive_path } is an inner zipfile" )
222- return [( extract_zip (final_path , 0 , inner_path , archive_path ) )]
241+ return [extract_zip (final_path , 0 , inner_path , archive_path )]
223242
224243
225244def find_zips (root : os .PathLike ):
0 commit comments