@@ -53,7 +53,9 @@ def open_dataset(
5353 elif filename_or_obj .startswith ("s3://" ):
5454 ds = read_dataset_from_directory (filename_or_obj )
5555 else :
56- raise ValueError (f"{ filename_or_obj } is neither a path nor a directory." )
56+ raise ValueError (
57+ f"{ filename_or_obj } is neither a path nor a directory."
58+ )
5759 ds .set_close (self .close )
5860 return ds
5961
@@ -74,7 +76,9 @@ def read_dataset_from_archive(
7476def read_dataset_from_directory (data_dir ):
7577 LOGGER .info (f"Processing { data_dir } " )
7678 arrays = {
77- name : rioxarray .open_rasterio (str (data_dir ) + "/" + (filename + ".TIF" )).squeeze ()
79+ name : rioxarray .open_rasterio (
80+ str (data_dir ) + "/" + (filename + ".TIF" )
81+ ).squeeze ()
7882 for name , filename in VAR_MAP .items ()
7983 }
8084 ds = xr .Dataset (arrays )
@@ -85,11 +89,14 @@ def read_dataset_from_directory(data_dir):
8589def add_metadata (ds : xr .Dataset , data_dir : pathlib .Path ):
8690 if str (data_dir ).startswith ("s3://" ):
8791 import fsspec
92+
8893 fs = fsspec .filesystem ("s3" )
8994 with fs .open (str (data_dir ) + "/" + "METADATA.XML" ) as fh :
9095 root = xml .etree .ElementTree .parse (fh ).getroot ()
9196 else :
92- root = xml .etree .ElementTree .parse (str (data_dir ) + "/" + "METADATA.XML" ).getroot ()
97+ root = xml .etree .ElementTree .parse (
98+ str (data_dir ) + "/" + "METADATA.XML"
99+ ).getroot ()
93100 points = root .findall ("base/spatialCoverage/boundingPolygon/point" )
94101 bounds = shapely .Polygon (
95102 [float (p .find ("longitude" ).text ), p .find ("latitude" ).text ]
@@ -179,40 +186,27 @@ def extract_archives(
179186 archive_path : os .PathLike | str , dest_dir : os .PathLike | str
180187) -> Iterable [pathlib .Path ]:
181188 dest_path = pathlib .Path (dest_dir )
189+ inner_path = dest_path / "inner-archive"
190+ final_path = dest_path / "data"
191+ os .mkdir (final_path )
182192 archive_path = pathlib .Path (archive_path )
183193 if archive_path .name .endswith (".tar.gz" ):
184- # An EnMAP tgz usually contains one or more zip archives
185- # containing the actual data files.
194+ # An EnMAP tgz usually contains one or more zip archives containing
195+ # the actual data files.
186196 outer_path = dest_path / "outer-archive"
187197 LOGGER .info (f"Extracting { archive_path .name } " )
188198 with tarfile .open (archive_path ) as tgz_file :
189199 tgz_file .extractall (path = outer_path , filter = "data" )
200+ data_paths = []
201+ for index , path_to_zip_file in enumerate (find_zips (outer_path )):
202+ data_paths .append (
203+ extract_zip (final_path , index , inner_path , path_to_zip_file )
204+ )
205+ return data_paths
190206 else :
191- # Assume it's a zip and skip the outer archive
192- # extraction step.
207+ # Assume it's a zip and skip the outer archive extraction step.
193208 LOGGER .info (f"Assuming { archive_path } is an inner zipfile" )
194- outer_path = archive_path .parent
195- inner_path = dest_path / "inner-archive"
196-
197- data_paths = []
198- final_path = dest_path / "data"
199- os .mkdir (final_path )
200- for index , path_to_zip_file in enumerate (find_zips (outer_path )):
201- LOGGER .info (f"Extracting { path_to_zip_file .name } " )
202- extract_path = inner_path / str (index )
203- with zipfile .ZipFile (path_to_zip_file , "r" ) as zip_ref :
204- zip_ref .extractall (extract_path )
205- input_data_path = list (extract_path .iterdir ())[0 ]
206- input_data_dir = input_data_path .name
207- output_data_path = final_path / input_data_dir
208- data_paths .append (output_data_path )
209- prefix_length = len (input_data_path .name ) + 1
210- os .mkdir (output_data_path )
211- for filepath in input_data_path .iterdir ():
212- os .rename (
213- filepath , output_data_path / filepath .name [prefix_length :]
214- )
215- return data_paths
209+ return [(extract_zip (final_path , 0 , inner_path , archive_path ))]
216210
217211
218212def find_zips (root : os .PathLike ):
@@ -221,3 +215,23 @@ def find_zips(root: os.PathLike):
221215 for filename in files :
222216 if filename .endswith (".ZIP" ):
223217 yield pathlib .Path (parent , filename )
218+
219+
220+ def extract_zip (
221+ final_path : pathlib .Path ,
222+ index : int ,
223+ inner_path : pathlib .Path ,
224+ path_to_zip_file : pathlib .Path ,
225+ ) -> pathlib .Path :
226+ LOGGER .info (f"Extracting { path_to_zip_file .name } " )
227+ extract_path = inner_path / str (index )
228+ with zipfile .ZipFile (path_to_zip_file , "r" ) as zip_ref :
229+ zip_ref .extractall (extract_path )
230+ input_data_path = list (extract_path .iterdir ())[0 ]
231+ input_data_dir = input_data_path .name
232+ output_data_path = final_path / input_data_dir
233+ prefix_length = len (input_data_path .name ) + 1
234+ os .mkdir (output_data_path )
235+ for filepath in input_data_path .iterdir ():
236+ os .rename (filepath , output_data_path / filepath .name [prefix_length :])
237+ return output_data_path
0 commit comments