Skip to content

Commit 59fa877

Browse files
committed
Unzip files from local source
1 parent 80557eb commit 59fa877

File tree

2 files changed

+96
-28
lines changed

2 files changed

+96
-28
lines changed

src/ess/reduce/data/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# SPDX-License-Identifier: BSD-3-Clause
22
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
3-
"""Data files bundled with ESSreduce."""
3+
"""Data files bundled with ESSreduce.
4+
5+
This module requires the Pooch package which is not a hard dependency of ESSreduce.
6+
It has to be installed separately with either pip or conda.
7+
"""
48

59
from pathlib import Path
610

src/ess/reduce/data/_registry.py

Lines changed: 91 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def make_registry(
4343
... "zipped.zip": Entry(alg="blake2b", chk="abcdef123456789", unzip=True),
4444
... }
4545
46-
In the example above, the specification for ``file1.dat`` and ``file2.csv`` are
46+
In the example above, the specifications for ``file1.dat`` and ``file2.csv`` are
4747
essentially equivalent.
4848
``folder/nested.dat`` is a file in a subfolder.
4949
Paths like this must always use forward slashes (/) even on Windows.
@@ -84,7 +84,12 @@ def make_registry(
8484
"""
8585
if (override := os.environ.get(_LOCAL_REGISTRY_ENV_VAR)) is not None:
8686
return LocalRegistry(
87-
_check_local_override_path(override), prefix, files, version=version
87+
_check_local_override_path(override),
88+
prefix,
89+
files,
90+
version=version,
91+
base_url=base_url,
92+
retry_if_failed=retry_if_failed,
8893
)
8994
return PoochRegistry(
9095
prefix,
@@ -164,22 +169,14 @@ def __init__(
164169
base_url: str,
165170
retry_if_failed: int = 3,
166171
) -> None:
167-
try:
168-
import pooch
169-
except ImportError:
170-
raise ImportError(
171-
"You need to install Pooch to use the PoochRegistry."
172-
) from None
173-
174-
self._registry = pooch.create(
175-
path=pooch.os_cache(prefix),
176-
env=_LOCAL_CACHE_ENV_VAR,
177-
base_url=f'{base_url}/{prefix}/{version}/',
178-
registry=_to_pooch_registry(files),
172+
self._registry = _create_pooch(
173+
prefix,
174+
files,
175+
version=version,
176+
base_url=base_url,
179177
retry_if_failed=retry_if_failed,
180178
)
181-
self._unzip_processor = pooch.Unzip()
182-
179+
self._unzip_processor = _import_pooch().Unzip()
183180
super().__init__(files)
184181

185182
@cache # noqa: B019
@@ -189,21 +186,36 @@ def get_path(self, name: str) -> Path:
189186
Downloads the file if necessary.
190187
"""
191188
if self._needs_unzip(name):
192-
paths = self._registry.fetch(name, processor=self._unzip_processor)
193-
if len(paths) != 1:
194-
raise ValueError(
195-
f"Expected exactly one file to unzip, got {len(paths)} in '{name}'."
196-
)
197-
return Path(paths[0])
189+
paths: list[str] = self._registry.fetch( # type: ignore[assignment]
190+
name, processor=self._unzip_processor
191+
)
192+
return Path(_expect_single_unzipped(paths, name))
198193
return Path(self._registry.fetch(name))
199194

200195

201196
class LocalRegistry(Registry):
202197
def __init__(
203-
self, path: Path, prefix: str, files: Mapping[str, str | Entry], *, version: str
198+
self,
199+
source_path: Path,
200+
prefix: str,
201+
files: Mapping[str, str | Entry],
202+
*,
203+
version: str,
204+
base_url: str,
205+
retry_if_failed: int = 3,
204206
) -> None:
207+
# Piggyback off of Pooch to determine the cache directory.
208+
pooch_registry = _create_pooch(
209+
prefix,
210+
files,
211+
version=version,
212+
base_url=base_url,
213+
retry_if_failed=retry_if_failed,
214+
)
215+
pooch = _import_pooch()
216+
self._unzip_processor = pooch.processors.Unzip(extract_dir=pooch_registry.path)
217+
self._source_path = source_path.resolve().joinpath(*prefix.split("/"), version)
205218
super().__init__(files)
206-
self._path = path.resolve().joinpath(*prefix.split("/"), version)
207219

208220
@cache # noqa: B019
209221
def get_path(self, name: str) -> Path:
@@ -222,14 +234,66 @@ def get_path(self, name: str) -> Path:
222234

223235
_check_hash(name, path, entry)
224236

225-
# TODO unzip
226-
237+
if self._needs_unzip(name):
238+
return Path(
239+
_expect_single_unzipped(
240+
self._unzip_processor(os.fspath(path), "download", None), path
241+
)
242+
)
227243
return path
228244

229245
def _local_path(self, name: str) -> Path:
230246
# Split on "/" because `name` is always a POSIX-style path, but the return
231247
# value is a system path, i.e., it can be a Windows-style path.
232-
return self._path.joinpath(*name.split("/"))
248+
return self._source_path.joinpath(*name.split("/"))
249+
250+
251+
def _import_pooch() -> Any:
252+
try:
253+
import pooch
254+
except ImportError:
255+
raise ImportError(
256+
"You need to install Pooch to access test and tutorial files. "
257+
"See https://www.fatiando.org/pooch/latest/index.html"
258+
) from None
259+
260+
return pooch
261+
262+
263+
def _create_pooch(
264+
prefix: str,
265+
files: Mapping[str, str | Entry],
266+
*,
267+
version: str,
268+
base_url: str,
269+
retry_if_failed: int = 3,
270+
) -> Any:
271+
pooch = _import_pooch()
272+
return pooch.create(
273+
path=pooch.os_cache(prefix),
274+
env=_LOCAL_CACHE_ENV_VAR,
275+
base_url=f'{base_url}/{prefix}/{version}/',
276+
registry=_to_pooch_registry(files),
277+
retry_if_failed=retry_if_failed,
278+
)
279+
280+
281+
def _pooch_unzip_processor(extract_dir: Path) -> Any:
282+
try:
283+
import pooch
284+
except ImportError:
285+
raise ImportError("You need to install Pooch to unzip files.") from None
286+
287+
return pooch.processors.Unzip(extract_dir=os.fspath(extract_dir))
288+
289+
290+
def _expect_single_unzipped(paths: list[str], archive: str | os.PathLike) -> str:
291+
if len(paths) != 1:
292+
raise ValueError(
293+
f"Expected exactly one file to unzip, got {len(paths)} in "
294+
f"'{os.fspath(archive)}'."
295+
)
296+
return paths[0]
233297

234298

235299
def _check_hash(name: str, path: Path, entry: Entry) -> None:

0 commit comments

Comments
 (0)