Skip to content

Commit eb8ee9c

Browse files
jsignellgadomski
andauthored
Improve documentation of TestData (#451)
Co-authored-by: Pete Gadomski <pete.gadomski@gmail.com>
1 parent adaa9f3 commit eb8ee9c

File tree

1 file changed

+91
-46
lines changed

1 file changed

+91
-46
lines changed

src/stactools/testing/test_data.py

Lines changed: 91 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,147 @@
11
import os
22
import shutil
3+
from dataclasses import dataclass
34
from tempfile import TemporaryDirectory
4-
from typing import Any, Dict
5+
from typing import Any, Dict, Optional, Union
56
from zipfile import ZipFile
67

78
import fsspec
89
import requests
910

10-
# TODO make external data a dataclass
1111

12-
# example external data:
13-
# {
14-
# 'AST_L1T_00305032000040446_20150409135350_78838.hdf': {
15-
# 'url':
16-
# ('https://ai4epublictestdata.blob.core.windows.net/'
17-
# 'stactools/aster/AST_L1T_00305032000040446_20150409135350_78838.zip'),
18-
# 'compress':
19-
# 'zip'
20-
# }
21-
# }
12+
@dataclass
13+
class ExternalData:
14+
"""External data configurations for fetching and storing remote files.
2215
16+
Args:
17+
url (str): URL at which the external data is found.
18+
compress (str): Compression method that has been used on external data.
19+
If provided, data is extracted after it is fetched.
20+
Only zip is supported. Defaults to None.
21+
s3 (Dict[str, Any]): Dictionary containing keyword arguments to use
22+
when instantiating ``s3fs.S3FileSystem``. Defaults to None.
23+
planetary_computer (bool): Whether external data is on planetary computer
24+
and needs to be signed. Defaults to False.
25+
26+
"""
2327

28+
url: str
29+
compress: Optional[str] = None
30+
s3: Optional[Dict[str, Any]] = None
31+
planetary_computer: bool = False
32+
33+
34+
@dataclass
2435
class TestData:
2536
"""A structure for getting paths to test data files, and fetching external
2637
data for local testing.
2738
28-
Initialize this from, e.g., ``tests/__init__.py``:
39+
Initializing this from, e.g., ``/home/user/my-package/tests/__init__.py``:
2940
3041
.. code-block:: python
3142
3243
test_data = TestData(__file__)
3344
34-
The external data dictionary should look something like this:
45+
Means that ``get_path`` will be relative to ``/home/user/my-package/tests``.
46+
47+
.. code-block:: python
48+
49+
test_data.get_path("data-files/basic")
50+
# "/home/user/my-package/tests/data-files/basic"
51+
52+
When caching external data that base path is appended with
53+
``test_data.external_subpath`` which by default is 'data-files/external'.
54+
55+
For instance with the following external data configuration the external
56+
data file will be fetched from the URL, extracted from its zip file and
57+
locally stored at:
58+
``/home/user/my-package/tests/data-files/external/AST_L1T_00305032000040446_20150409135350_78838.hdf``
3559
3660
.. code-block:: python
3761
38-
{
62+
test_data.external_data = {
3963
'AST_L1T_00305032000040446_20150409135350_78838.hdf': {
4064
'url':
4165
('https://ai4epublictestdata.blob.core.windows.net/'
4266
'stactools/aster/AST_L1T_00305032000040446_20150409135350_78838.zip'),
4367
'compress': 'zip'
4468
}
4569
}
70+
test_data.get_external_data("AST_L1T_00305032000040446_20150409135350_78838.hdf")
4671
4772
Args:
48-
path (str): The path to the file at the root of the test data directory.
49-
external_data (dict[str, Any]):
50-
External data configurations. These dictionaries can be used to
51-
configure files that are fetched from remote locations and stored
52-
locally for testing.
73+
path (str): The path to any file in the directory where data is
74+
(or will be) stored. The directory information is taken from this
75+
path and used as the base for relative paths for the local data. It
76+
is stored on the class as ``self.base_path``
77+
external_data (Dict[str, ExternalData]):
78+
External data configurations for fetching and storing remote files.
79+
This is defined as a dictionary with the following structure: the
80+
key is the relative path (relative to
81+
``self.base_path / self.external_subpath``) for cached data
82+
after it is fetched from remote and the value is the configuration
83+
as defined in :class:`ExternalData`.
84+
external_subpath (str): The subpath under ``self.base_path`` that is
85+
used for storing external data files. Defaults to 'data-files/external'
5386
"""
5487

5588
__test__ = False
5689

57-
def __init__(self, path: str, external_data: Dict[str, Any] = {}) -> None:
58-
self.path = path
90+
def __init__(
91+
self,
92+
path: str,
93+
external_data: Dict[str, Union[Dict[str, Any], ExternalData]] = {},
94+
external_subpath: str = "data-files/external",
95+
) -> None:
96+
self.base_path = os.path.abspath(os.path.dirname(path))
97+
self.external_subpath = external_subpath
5998
self.external_data = external_data
6099

61100
def get_path(self, rel_path: str) -> str:
62-
"""Returns an absolute path to a local test file.
101+
"""Returns an absolute path to a local data file.
63102
64103
Args:
65104
rel_path (str):
66105
The relative path to the test data file. The path is
67-
assumed to be relative to the directory containing ``self.path``.
106+
assumed to be relative to ``self.base_path``.
68107
69108
Returns:
70-
str: An absolute path.
109+
str: The absolute path joining ``self.base_path`` and ``rel_path``
71110
"""
72-
return os.path.abspath(os.path.join(os.path.dirname(self.path), rel_path))
111+
return os.path.join(self.base_path, rel_path)
73112

74113
def get_external_data(self, rel_path: str) -> str:
75-
"""Returns an absolute path to a local test file after downloading it
76-
from an external source.
114+
"""Returns the path to the local cached version of the external data.
115+
116+
If data is not yet cached, this method fetches it, caches it, then returns
117+
the path to the local cached version.
77118
78119
Args:
79-
rel_path (str): The key to the external data, as configured in class
80-
instantiation.
120+
rel_path (str): This is both the filename that the local data will be
121+
stored at _and_ a key in the ``external_data`` dictionary where the
122+
corresponding value is the configuration information for the external
123+
data.
81124
82125
Returns:
83-
str: The absolute path to the external data file.
126+
str: The absolute path to the local cached version of the
127+
external data file.
84128
"""
85-
path = self.get_path(os.path.join("data-files/external", rel_path))
129+
path = self.get_path(os.path.join(self.external_subpath, rel_path))
86130
if not os.path.exists(path):
87-
entry = self.external_data.get(rel_path)
88-
if entry is None:
131+
config = self.external_data.get(rel_path)
132+
if config is None:
89133
raise Exception(
90-
"Path {} does not exist and there is no entry "
91-
"for external test data {}.".format(path, rel_path)
134+
f"Local path {path} does not exist and there is no key "
135+
f"in ``external_data`` that matches {rel_path}"
92136
)
93137

94-
print("Downloading external test data {}...".format(rel_path))
138+
print(f"Downloading external test data {rel_path}...")
95139
os.makedirs(os.path.dirname(path), exist_ok=True)
96140

97-
s3_config = entry.get("s3")
98-
is_pc = entry.get("planetary_computer") # True if from PC, needs signing
99-
if s3_config:
141+
if not isinstance(config, ExternalData):
142+
config = ExternalData(**config)
143+
144+
if config.s3:
100145
try:
101146
import s3fs
102147
except ImportError as e:
@@ -107,11 +152,11 @@ def get_external_data(self, rel_path: str) -> str:
107152
"with s3fs via `pip install stactools[s3]` and try again."
108153
)
109154
raise (e)
110-
s3 = s3fs.S3FileSystem(**s3_config)
111-
with s3.open(entry["url"]) as f:
155+
s3 = s3fs.S3FileSystem(**config.s3)
156+
with s3.open(config.url) as f:
112157
data = f.read()
113-
elif is_pc:
114-
href = entry["url"]
158+
elif config.planetary_computer:
159+
href = config.url
115160
r = requests.get(
116161
"https://planetarycomputer.microsoft.com/api/sas/v1/sign?"
117162
f"href={href}"
@@ -123,10 +168,10 @@ def get_external_data(self, rel_path: str) -> str:
123168
data = f.read()
124169

125170
else:
126-
with fsspec.open(entry["url"]) as f:
171+
with fsspec.open(config.url) as f:
127172
data = f.read()
128173

129-
if entry.get("compress") == "zip":
174+
if config.compress == "zip":
130175
with TemporaryDirectory() as tmp_dir:
131176
tmp_path = os.path.join(tmp_dir, "file.zip")
132177
with open(tmp_path, "wb") as f:

0 commit comments

Comments
 (0)