Skip to content

Commit 3b83143

Browse files
committed
Relax dataset filter matching
Relax the optional search filter in `Dataset.get_datasets` to allow matches anywhere in the dataset name (instead of only at the beginning). In addition, also search the dataset's ID for matches.
1 parent 8118ad4 commit 3b83143

File tree

3 files changed

+38
-22
lines changed

3 files changed

+38
-22
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## ?.?.?
2+
3+
* The optional search filter in `Dataset.get_datasets` has been relaxed to allow
4+
matches anywhere in the dataset name (instead of only at the beginning). In
5+
addition it now also searches the dataset's ID.
6+
17
## 2.4.0
28

39
* Added support for Python 3.11.

okdata/sdk/data/dataset.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,19 @@ def create_dataset(self, data=None, retries=0):
1919
log.info(f"Created dataset: {body['Id']}")
2020
return body
2121

22+
def _matches(self, dataset, pattern):
23+
"""Return true if `dataset`'s ID or name matches `pattern`."""
24+
return re.search(pattern, dataset["Id"], re.IGNORECASE) or (
25+
"title" in dataset and re.search(pattern, dataset["title"], re.IGNORECASE)
26+
)
27+
2228
def get_datasets(self, filter=None, retries=0):
2329
url = self.config.get("datasetUrl")
2430
log.info(f"SDK:Get datasets from: {url}")
25-
result = self.get(url, retries=retries)
26-
ret = result.json()
27-
if filter is not None:
28-
if isinstance(filter, str):
29-
tmp = []
30-
for el in ret:
31-
if "title" in el and re.match(filter, el["title"], re.IGNORECASE):
32-
tmp.append(el)
33-
ret = tmp
34-
return ret
31+
datasets = self.get(url, retries=retries).json()
32+
if isinstance(filter, str):
33+
return [d for d in datasets if self._matches(d, filter)]
34+
return datasets
3535

3636
def get_dataset(self, datasetid, retries=0):
3737
datasetUrl = self.config.get("datasetUrl")

tests/data/dataset_test.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
import re
21
import json
2+
import re
3+
34
import pytest
45
from requests.exceptions import HTTPError
56

6-
from okdata.sdk.data.dataset import Dataset
77
from okdata.sdk.auth.auth import Authenticate
88
from okdata.sdk.config import Config
9+
from okdata.sdk.data.dataset import Dataset
910
from okdata.sdk.file_cache import FileCache
1011

1112
config = Config()
@@ -30,33 +31,42 @@ def test_sdk_no_auth_headers(self):
3031

3132
assert ds.headers() == {}
3233

33-
def test_getDatasets(self, requests_mock):
34+
def test_get_datasets(self, requests_mock):
3435
ds = Dataset(config=config, auth=auth_default)
35-
response = json.dumps([{"Id": "test-get-dataset"}])
36+
response = json.dumps([{"Id": "test-get-datasets"}])
3637
matcher = re.compile("datasets")
3738
requests_mock.register_uri("GET", matcher, text=response, status_code=200)
38-
list = ds.get_datasets()
39-
assert list[0]["Id"] == "test-get-dataset"
39+
res = ds.get_datasets()
40+
assert [d["Id"] for d in res] == ["test-get-datasets"]
4041

41-
def test_getDatasets_filter_no_result(self, requests_mock):
42+
def test_get_datasets_filter_no_results(self, requests_mock):
4243
ds = Dataset(config=config, auth=auth_default)
4344
response = json.dumps(
4445
[{"Id": "foo-bar", "title": "deichman", "publisher": "someone"}]
4546
)
4647
matcher = re.compile("datasets")
4748
requests_mock.register_uri("GET", matcher, text=response, status_code=200)
48-
list = ds.get_datasets("eide")
49-
assert len(list) == 0
49+
assert ds.get_datasets("eide") == []
5050

51-
def test_getDatasets_filter(self, requests_mock):
51+
def test_get_datasets_filter_by_id(self, requests_mock):
5252
ds = Dataset(config=config, auth=auth_default)
5353
response = json.dumps(
5454
[{"Id": "foo-bar", "title": "eide"}, {"Id": "foo-bar2", "title": "someone"}]
5555
)
5656
matcher = re.compile("datasets")
5757
requests_mock.register_uri("GET", matcher, text=response, status_code=200)
58-
list = ds.get_datasets("eide")
59-
assert len(list) == 1
58+
res = ds.get_datasets("bar2")
59+
assert [d["Id"] for d in res] == ["foo-bar2"]
60+
61+
def test_get_datasets_filter_by_title(self, requests_mock):
62+
ds = Dataset(config=config, auth=auth_default)
63+
response = json.dumps(
64+
[{"Id": "foo-bar", "title": "eide"}, {"Id": "foo-bar2", "title": "someone"}]
65+
)
66+
matcher = re.compile("datasets")
67+
requests_mock.register_uri("GET", matcher, text=response, status_code=200)
68+
res = ds.get_datasets("eide")
69+
assert [d["Id"] for d in res] == ["foo-bar"]
6070

6171
def test_getDataset(self, requests_mock):
6272
ds = Dataset(config=config, auth=auth_default)

0 commit comments

Comments
 (0)