Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 6 additions & 19 deletions dials_data/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
import hashlib
import os
import tarfile
import warnings
import zipfile
from pathlib import Path
from typing import Any
from urllib.parse import urlparse

import py.path
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
Expand Down Expand Up @@ -300,36 +298,25 @@ def result_filter(self, result, **kwargs):
"""
return result

def __call__(self, test_data: str, pathlib=None, **kwargs):
def __call__(self, test_data: str, **kwargs):
"""
Return the location of a dataset, transparently downloading it if
necessary and possible.
The return value can be manipulated by overriding the result_filter
function.
:param test_data: name of the requested dataset.
:param pathlib: Whether to return the result as a Python pathlib object.
The default for this setting is 'False' for now (leading
to a py.path.local object being returned), but the default
will change to 'True' in a future dials.data release.
Set to 'True' for forward compatibility.
:return: A pathlib or py.path.local object pointing to the dataset, or False
if the dataset is not available.
"""
if "pathlib" in kwargs:
raise ValueError(
"The pathlib parameter has been removed. The "
"DataFetcher always returns pathlib.Path() objects now."
)
if test_data not in self._cache:
self._cache[test_data] = self._attempt_fetch(test_data)
if pathlib is None:
warnings.warn(
"The DataFetcher currently returns py.path.local() objects. "
"This will in the future change to pathlib.Path() objects. "
"You can either add a pathlib=True argument to obtain a pathlib.Path() object, "
"or pathlib=False to silence this warning for now.",
DeprecationWarning,
stacklevel=2,
)
if not self._cache[test_data]:
return self.result_filter(result=False)
elif not pathlib:
return self.result_filter(result=py.path.local(self._cache[test_data]))
return self.result_filter(result=self._cache[test_data])

def _attempt_fetch(self, test_data: str) -> Path | None:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ build-backend = "setuptools.build_meta"

[project]
name = "dials_data"
version = "2.4.0"
version = "3.0.0"
description = "DIALS Regression Data Manager"
authors = [
{ name = "DIALS development team", email = "dials-[email protected]" },
{ name = "DIALS development team", email = "dials-[email protected]" },
]
license = { text = "BSD 3-Clause License" }
classifiers = [
Expand Down
56 changes: 2 additions & 54 deletions tests/test_dials_data.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
from __future__ import annotations

import pathlib
from unittest import mock

import py
import pytest

import dials_data
import dials_data.datasets
import dials_data.download
Expand All @@ -22,60 +18,12 @@ def test_repository_location():

def test_fetching_undefined_datasets_does_not_crash():
df = dials_data.download.DataFetcher(read_only=True)
assert df("aardvark", pathlib=True) is False
assert df("aardvark") is False


def test_requests_for_future_datasets_can_be_intercepted():
df = dials_data.download.DataFetcher(read_only=True)
df.result_filter = mock.Mock()
df.result_filter.return_value = False
assert df("aardvark", pathlib=True) is False
assert df("aardvark") is False
df.result_filter.assert_called_once_with(result=False)


@mock.patch("dials_data.datasets.repository_location")
@mock.patch("dials_data.download.fetch_dataset")
def test_datafetcher_constructs_py_path(fetcher, root):
root.return_value = pathlib.Path("/tmp/root")
fetcher.return_value = True

df = dials_data.download.DataFetcher(read_only=True)
with pytest.warns(DeprecationWarning):
ds = df("dataset")
assert pathlib.Path(ds).resolve() == pathlib.Path("/tmp/root/dataset").resolve()
assert isinstance(ds, py.path.local)
fetcher.assert_called_once_with(
"dataset", pre_scan=True, read_only=False, verify=True
)

ds = df("dataset", pathlib=False)
assert pathlib.Path(ds).resolve() == pathlib.Path("/tmp/root/dataset").resolve()
assert isinstance(ds, py.path.local)
fetcher.assert_called_once()


@mock.patch("dials_data.datasets.repository_location")
@mock.patch("dials_data.download.fetch_dataset")
def test_datafetcher_constructs_path(fetcher, root):
test_path = pathlib.Path("/tmp/root")
root.return_value = test_path
fetcher.return_value = True

df = dials_data.download.DataFetcher(read_only=True)
ds = df("dataset", pathlib=True)
assert ds == test_path / "dataset"

assert isinstance(ds, pathlib.Path)
fetcher.assert_called_once_with(
"dataset", pre_scan=True, read_only=False, verify=True
)

with pytest.warns(DeprecationWarning):
ds = df("dataset")
assert pathlib.Path(ds).resolve() == test_path.joinpath("dataset").resolve()
assert not isinstance(
ds, pathlib.Path
) # default is currently to return py.path.local()
fetcher.assert_called_once_with(
"dataset", pre_scan=True, read_only=False, verify=True
)