11import os
22import shutil
3+ from dataclasses import dataclass
34from tempfile import TemporaryDirectory
4- from typing import Any , Dict
5+ from typing import Any , Dict , Optional , Union
56from zipfile import ZipFile
67
78import fsspec
89import requests
910
10- # TODO make external data a dataclass
1111
12- # example external data:
13- # {
14- # 'AST_L1T_00305032000040446_20150409135350_78838.hdf': {
15- # 'url':
16- # ('https://ai4epublictestdata.blob.core.windows.net/'
17- # 'stactools/aster/AST_L1T_00305032000040446_20150409135350_78838.zip'),
18- # 'compress':
19- # 'zip'
20- # }
21- # }
12+ @dataclass
13+ class ExternalData :
14+ """External data configurations for fetching and storing remote files.
2215
16+ Args:
17+ url (str): URL at which the external data is found.
18+ compress (str): Compression method that has been used on external data.
19+ If provided, data is extracted after it is fetched.
20+ Only zip is supported. Defaults to None.
21+ s3 (Dict[str, Any]): Dictionary containing keyword arguments to use
22+ when instantiating ``s3fs.S3FileSystem``. Defaults to None.
23+ planetary_computer (bool): Whether external data is on planetary computer
24+ and needs to be signed. Defaults to False.
25+
26+ """
2327
28+ url : str
29+ compress : Optional [str ] = None
30+ s3 : Optional [Dict [str , Any ]] = None
31+ planetary_computer : bool = False
32+
33+
34+ @dataclass
2435class TestData :
2536 """A structure for getting paths to test data files, and fetching external
2637 data for local testing.
2738
28- Initialize this from, e.g., ``tests/__init__.py``:
39+ Initializing this from, e.g., ``/home/user/my-package/ tests/__init__.py``:
2940
3041 .. code-block:: python
3142
3243 test_data = TestData(__file__)
3344
34- The external data dictionary should look something like this:
45+ Means that ``get_path`` will be relative to ``/home/user/my-package/tests``.
46+
47+ .. code-block:: python
48+
49+ test_data.get_path("data-files/basic")
50+ # "/home/user/my-package/tests/data-files/basic"
51+
52+ When caching external data that base path is appended with
53+ ``test_data.external_subpath`` which by default is 'data-files/external'.
54+
55+ For instance with the following external data configuration the external
56+ data file will be fetched from the URL, extracted from its zip file and
57+ locally stored at:
58+ ``/home/user/my-package/tests/data-files/external/AST_L1T_00305032000040446_20150409135350_78838.hdf``
3559
3660 .. code-block:: python
3761
38- {
62+ test_data.external_data = {
3963 'AST_L1T_00305032000040446_20150409135350_78838.hdf': {
4064 'url':
4165 ('https://ai4epublictestdata.blob.core.windows.net/'
4266 'stactools/aster/AST_L1T_00305032000040446_20150409135350_78838.zip'),
4367 'compress': 'zip'
4468 }
4569 }
70+ test_data.get_external_data("AST_L1T_00305032000040446_20150409135350_78838.hdf")
4671
4772 Args:
48- path (str): The path to the file at the root of the test data directory.
49- external_data (dict[str, Any]):
50- External data configurations. These dictionaries can be used to
51- configure files that are fetched from remote locations and stored
52- locally for testing.
73+ path (str): The path to any file in the directory where data is
74+ (or will be) stored. The directory information is taken from this
75+ path and used as the base for relative paths for the local data. It
76+ is stored on the class as ``self.base_path``
77+ external_data (Dict[str, ExternalData]):
78+ External data configurations for fetching and storing remote files.
79+ This is defined as a dictionary with the following structure: the
80+ key is the relative path (relative to
81+ ``self.base_path / self.external_subpath``) for cached data
82+ after it is fetched from remote and the value is the configuration
83+ as defined in :class:`ExternalData`.
84+ external_subpath (str): The subpath under ``self.base_path`` that is
85+ used for storing external data files. Defaults to 'data-files/external'
5386 """
5487
5588 __test__ = False
5689
57- def __init__ (self , path : str , external_data : Dict [str , Any ] = {}) -> None :
58- self .path = path
90+ def __init__ (
91+ self ,
92+ path : str ,
93+ external_data : Dict [str , Union [Dict [str , Any ], ExternalData ]] = {},
94+ external_subpath : str = "data-files/external" ,
95+ ) -> None :
96+ self .base_path = os .path .abspath (os .path .dirname (path ))
97+ self .external_subpath = external_subpath
5998 self .external_data = external_data
6099
61100 def get_path (self , rel_path : str ) -> str :
62- """Returns an absolute path to a local test file.
101+ """Returns an absolute path to a local data file.
63102
64103 Args:
65104 rel_path (str):
66105 The relative path to the test data file. The path is
67- assumed to be relative to the directory containing ``self.path ``.
106+ assumed to be relative to ``self.base_path ``.
68107
69108 Returns:
70- str: An absolute path.
109+ str: The absolute path joining ``self.base_path`` and ``rel_path``
71110 """
72- return os .path .abspath ( os . path . join (os . path . dirname ( self .path ) , rel_path ) )
111+ return os .path .join (self .base_path , rel_path )
73112
74113 def get_external_data (self , rel_path : str ) -> str :
75- """Returns an absolute path to a local test file after downloading it
76- from an external source.
114+ """Returns the path to the local cached version of the external data.
115+
116+ If data is not yet cached, this method fetches it, caches it, then returns
117+ the path to the local cached version.
77118
78119 Args:
79- rel_path (str): The key to the external data, as configured in class
80- instantiation.
120+ rel_path (str): This is both the filename that the local data will be
121+ stored at _and_ a key in the ``external_data`` dictionary where the
122+ corresponding value is the configuration information for the external
123+ data.
81124
82125 Returns:
83- str: The absolute path to the external data file.
126+ str: The absolute path to the local cached version of the
127+ external data file.
84128 """
85- path = self .get_path (os .path .join ("data-files/external" , rel_path ))
129+ path = self .get_path (os .path .join (self . external_subpath , rel_path ))
86130 if not os .path .exists (path ):
87- entry = self .external_data .get (rel_path )
88- if entry is None :
131+ config = self .external_data .get (rel_path )
132+ if config is None :
89133 raise Exception (
90- "Path { } does not exist and there is no entry "
91- "for external test data {}." . format ( path , rel_path )
134+ f"Local path { path } does not exist and there is no key "
135+ f"in ``external_data`` that matches { rel_path } "
92136 )
93137
94- print ("Downloading external test data {}..." . format ( rel_path ) )
138+ print (f "Downloading external test data { rel_path } ..." )
95139 os .makedirs (os .path .dirname (path ), exist_ok = True )
96140
97- s3_config = entry .get ("s3" )
98- is_pc = entry .get ("planetary_computer" ) # True if from PC, needs signing
99- if s3_config :
141+ if not isinstance (config , ExternalData ):
142+ config = ExternalData (** config )
143+
144+ if config .s3 :
100145 try :
101146 import s3fs
102147 except ImportError as e :
@@ -107,11 +152,11 @@ def get_external_data(self, rel_path: str) -> str:
107152 "with s3fs via `pip install stactools[s3]` and try again."
108153 )
109154 raise (e )
110- s3 = s3fs .S3FileSystem (** s3_config )
111- with s3 .open (entry [ " url" ] ) as f :
155+ s3 = s3fs .S3FileSystem (** config . s3 )
156+ with s3 .open (config . url ) as f :
112157 data = f .read ()
113- elif is_pc :
114- href = entry [ " url" ]
158+ elif config . planetary_computer :
159+ href = config . url
115160 r = requests .get (
116161 "https://planetarycomputer.microsoft.com/api/sas/v1/sign?"
117162 f"href={ href } "
@@ -123,10 +168,10 @@ def get_external_data(self, rel_path: str) -> str:
123168 data = f .read ()
124169
125170 else :
126- with fsspec .open (entry [ " url" ] ) as f :
171+ with fsspec .open (config . url ) as f :
127172 data = f .read ()
128173
129- if entry . get ( " compress" ) == "zip" :
174+ if config . compress == "zip" :
130175 with TemporaryDirectory () as tmp_dir :
131176 tmp_path = os .path .join (tmp_dir , "file.zip" )
132177 with open (tmp_path , "wb" ) as f :
0 commit comments