Skip to content

Commit 0de66ae

Browse files
authored
Merge pull request #1176 from xcube-dev/toniof-726-list_extensions
List filename extensions recognized by data stores
2 parents a5899a6 + e24dec6 commit 0de66ae

File tree

13 files changed

+547
-184
lines changed

13 files changed

+547
-184
lines changed

CHANGES.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
## Changes in 1.11.2 (in development)
1+
## Changes in 1.12.0 (in development)
2+
3+
### Enhancements
4+
* Added function `get_filename_extensions()` to data store framework:
5+
`from xcube.core.store import get_filename_extensions`.
6+
It allows for retrieving mappings of recognized filename extensions to
7+
respective data openers and writers. (#726)
28

39
### Other changes
410

docs/source/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Functions
1414

1515
.. autofunction:: xcube.core.store.find_data_store_extensions
1616

17+
.. autofunction:: xcube.core.store.get_filename_extensions
18+
1719
.. autofunction:: xcube.core.store.list_data_store_ids
1820

1921
.. autofunction:: xcube.core.store.get_data_store_class

docs/source/dataaccess.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ def init_plugin(ext_registry: extension.ExtensionRegistry):
798798
'{your_package}.opener:{YourOpenerClass}'),
799799
point=EXTENSION_POINT_DATA_OPENERS,
800800
name="{your_opener_id}",
801-
description='{your opener description}'
801+
description="{your opener description}",
802+
filename_extensions=[".j2k", ".zarr"], # adjust or leave empty
802803
)
803804
```

rtd-environment.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ dependencies:
88
# Required
99
- adlfs >=2023.1 # for azure blob filesystem
1010
- affine >=2.2
11+
- altair
12+
- chartlets >= 0.1.3
1113
- click >=8.0
1214
- cmocean >=2.0
1315
- dask >=2021.6
@@ -40,11 +42,12 @@ dependencies:
4042
- scipy >=1.6.0
4143
- setuptools >=41.0
4244
- shapely >=1.6
45+
- tabulate >=0.9
4346
- tornado >=6.0
4447
- urllib3 >=1.26
4548
- werkzeug <2.2 # >=2.2 slows down S3 tests (deps: moto->flask->werkzeug)
4649
- xarray >=2022.6, <= 2024.6
47-
- zarr >=2.11
50+
- zarr >=2.11, <3
4851
# Required by Coiled
4952
# These are very likely transitive deps anyway
5053
- lz4

test/core/store/fs/test_registry.py

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
MultiLevelDatasetDescriptor,
3030
MutableDataStore,
3131
)
32-
from xcube.core.store.fs.registry import new_fs_data_store
32+
from xcube.core.store.fs.registry import get_filename_extensions, new_fs_data_store
3333
from xcube.core.store.fs.store import FsDataStore
3434
from xcube.core.zarrstore import GenericZarrStore
3535
from xcube.util.temp import new_temp_dir
@@ -112,7 +112,7 @@ def test_open_packed(self):
112112
# noinspection PyUnresolvedReferences,PyPep8Naming
113113
class FsDataStoresTestMixin(ABC):
114114
@abstractmethod
115-
def create_data_store(self) -> FsDataStore:
115+
def create_data_store(self, read_only=False) -> FsDataStore:
116116
pass
117117

118118
@classmethod
@@ -137,6 +137,28 @@ def prepare_fs(cls, fs: fsspec.AbstractFileSystem, root: str):
137137
with fs.open(file_path, "w") as fp:
138138
fp.write("\n")
139139

140+
def test_no_write_to_read_only(self):
141+
data_store = self.create_data_store(read_only=True)
142+
data = new_cube_data()
143+
with self.assertRaises(DataStoreError) as dse:
144+
data_store.write_data(data)
145+
self.assertEqual("Data store is read-only.", f"{dse.exception}")
146+
147+
def test_no_delete_on_read_only(self):
148+
data_store = self.create_data_store(read_only=True)
149+
with self.assertRaises(DataStoreError) as dse:
150+
data_store.delete_data("the_data_id_does_not_even_matter.nc")
151+
self.assertEqual("Data store is read-only.", f"{dse.exception}")
152+
153+
def test_cannot_open_unknown_format(self):
154+
data_store = self.create_data_store()
155+
with self.assertRaises(DataStoreError) as dse:
156+
data_store.open_data("unknown.format")
157+
self.assertEqual(
158+
"Cannot determine data type for data resource 'unknown.format'",
159+
f"{dse.exception}",
160+
)
161+
140162
def test_mldataset_levels(self):
141163
data_store = self.create_data_store()
142164
self._assert_multi_level_dataset_format_supported(data_store)
@@ -491,21 +513,21 @@ def _assert_dataset_supported(
491513

492514

493515
class FileFsDataStoresTest(FsDataStoresTestMixin, unittest.TestCase):
494-
def create_data_store(self) -> FsDataStore:
516+
def create_data_store(self, read_only=False) -> FsDataStore:
495517
root = os.path.join(new_temp_dir(prefix="xcube"), ROOT_DIR)
496518
self.prepare_fs(fsspec.filesystem("file"), root)
497-
return new_fs_data_store("file", root=root, max_depth=3)
519+
return new_fs_data_store("file", root=root, max_depth=3, read_only=read_only)
498520

499521

500522
class MemoryFsDataStoresTest(FsDataStoresTestMixin, unittest.TestCase):
501-
def create_data_store(self) -> FsDataStore:
523+
def create_data_store(self, read_only=False) -> FsDataStore:
502524
root = ROOT_DIR
503525
self.prepare_fs(fsspec.filesystem("memory"), root)
504-
return new_fs_data_store("memory", root=root, max_depth=3)
526+
return new_fs_data_store("memory", root=root, max_depth=3, read_only=read_only)
505527

506528

507529
class S3FsDataStoresTest(FsDataStoresTestMixin, S3Test):
508-
def create_data_store(self) -> FsDataStore:
530+
def create_data_store(self, read_only=False) -> FsDataStore:
509531
root = ROOT_DIR
510532
storage_options = dict(
511533
anon=False,
@@ -515,5 +537,47 @@ def create_data_store(self) -> FsDataStore:
515537
)
516538
self.prepare_fs(fsspec.filesystem("s3", **storage_options), root)
517539
return new_fs_data_store(
518-
"s3", root=root, max_depth=3, storage_options=storage_options
540+
"s3",
541+
root=root,
542+
max_depth=3,
543+
storage_options=storage_options,
544+
read_only=read_only,
519545
)
546+
547+
548+
class GetFilenameExtensionsTest(unittest.TestCase):
549+
def test_get_filename_extensions_openers(self):
550+
opener_extensions = get_filename_extensions("openers")
551+
self.assertIn(".nc", list(opener_extensions.keys()))
552+
self.assertIn(".zarr", list(opener_extensions.keys()))
553+
self.assertIn(".levels", list(opener_extensions.keys()))
554+
self.assertIn(".shp", list(opener_extensions.keys()))
555+
self.assertIn(".geojson", list(opener_extensions.keys()))
556+
self.assertIn(".tif", list(opener_extensions.keys()))
557+
self.assertIn(".tiff", list(opener_extensions.keys()))
558+
self.assertIn(".geotiff", list(opener_extensions.keys()))
559+
self.assertTrue(len(opener_extensions[".nc"]) >= 6)
560+
self.assertTrue(len(opener_extensions[".zarr"]) >= 6)
561+
self.assertTrue(len(opener_extensions[".levels"]) >= 12)
562+
self.assertTrue(len(opener_extensions[".shp"]) >= 6)
563+
self.assertTrue(len(opener_extensions[".geojson"]) >= 6)
564+
self.assertTrue(len(opener_extensions[".tif"]) >= 12)
565+
self.assertTrue(len(opener_extensions[".tiff"]) >= 12)
566+
self.assertTrue(len(opener_extensions[".geotiff"]) >= 12)
567+
568+
def test_get_filename_extensions_writers(self):
569+
writer_extensions = get_filename_extensions("writers")
570+
self.assertIn(".nc", list(writer_extensions.keys()))
571+
self.assertIn(".zarr", list(writer_extensions.keys()))
572+
self.assertIn(".levels", list(writer_extensions.keys()))
573+
self.assertIn(".shp", list(writer_extensions.keys()))
574+
self.assertIn(".geojson", list(writer_extensions.keys()))
575+
self.assertTrue(len(writer_extensions[".nc"]) >= 6)
576+
self.assertTrue(len(writer_extensions[".zarr"]) >= 6)
577+
self.assertTrue(len(writer_extensions[".levels"]) >= 12)
578+
self.assertTrue(len(writer_extensions[".shp"]) >= 6)
579+
self.assertTrue(len(writer_extensions[".geojson"]) >= 6)
580+
581+
def test_get_filename_extensions_invalid(self):
582+
with self.assertRaises(DataStoreError):
583+
get_filename_extensions("rgth")

test/core/store/test_store.py

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
44
import unittest
5+
from typing import Literal
56
from unittest.mock import MagicMock, patch
67

78
import pytest
89
from fsspec.registry import register_implementation
910

1011
from xcube.core.store import (
1112
DataStoreError,
12-
PreloadedDataStore,
13+
get_data_store_class,
1314
list_data_store_ids,
1415
new_data_store,
1516
)
17+
from xcube.core.store.fs.store import BaseFsDataStore
1618
from xcube.core.store.preload import NullPreloadHandle
1719

1820

@@ -51,10 +53,20 @@ def test_list_data_store_ids_detail(self):
5153

5254

5355
class TestBaseFsDataStore(unittest.TestCase):
56+
def test_get_data_types(self):
57+
self.assertEqual(
58+
{"dataset", "geodataframe", "mldataset"},
59+
set(BaseFsDataStore.get_data_types()),
60+
)
61+
5462
def test_get_data_opener_ids(self):
5563
store = new_data_store("file")
5664
self.assertEqual(
57-
("dataset:geotiff:file",), store.get_data_opener_ids(data_id="test.geotiff")
65+
(
66+
"mldataset:geotiff:file",
67+
"dataset:geotiff:file",
68+
),
69+
store.get_data_opener_ids(data_id="test.geotiff"),
5870
)
5971
self.assertEqual(
6072
("mldataset:geotiff:file",),
@@ -97,6 +109,82 @@ def test_preload_data(self):
97109
self.assertIsInstance(store_test.preload_handle, NullPreloadHandle)
98110

99111

112+
class FsDataStoreTest(unittest.TestCase):
113+
def test_get_filename_extensions_abfs_openers(self):
114+
self.assert_accessors("abfs", "openers")
115+
116+
def test_get_filename_extensions_abfs_writers(self):
117+
self.assert_accessors("abfs", "writers")
118+
119+
def test_get_filename_extensions_file_openers(self):
120+
self.assert_accessors("file", "openers")
121+
122+
def test_get_filename_extensions_file_writers(self):
123+
self.assert_accessors("file", "writers")
124+
125+
def test_get_filename_extensions_ftp_openers(self):
126+
self.assert_accessors("ftp", "openers")
127+
128+
def test_get_filename_extensions_ftp_writers(self):
129+
self.assert_accessors("ftp", "writers")
130+
131+
def test_get_filename_extensions_https_openers(self):
132+
self.assert_accessors("https", "openers")
133+
134+
def test_get_filename_extensions_https_writers(self):
135+
self.assert_accessors("https", "writers")
136+
137+
def test_get_filename_extensions_memory_openers(self):
138+
self.assert_accessors("memory", "openers")
139+
140+
def test_get_filename_extensions_memory_writers(self):
141+
self.assert_accessors("memory", "writers")
142+
143+
def test_get_filename_extensions_s3_openers(self):
144+
self.assert_accessors("s3", "openers")
145+
146+
def test_get_filename_extensions_s3_writers(self):
147+
self.assert_accessors("s3", "writers")
148+
149+
def test_get_filename_extensions_unknown_accessor_type(self):
150+
with self.assertRaises(DataStoreError) as dse:
151+
self.assert_accessors("s3", "modifiers")
152+
self.assertEqual(
153+
"Invalid accessor type. Must be 'openers' or 'writers', was 'modifiers'",
154+
f"{dse.exception}",
155+
)
156+
157+
def assert_accessors(
158+
self, protocol: str, accessor_type: Literal["openers", "writers"]
159+
):
160+
store_cls = get_data_store_class(protocol)
161+
accessors = store_cls.get_filename_extensions(accessor_type)
162+
expected_accessors = {
163+
".geojson": [f"geodataframe:geojson:{protocol}"],
164+
".levels": [f"mldataset:levels:{protocol}", f"dataset:levels:{protocol}"],
165+
".nc": [f"dataset:netcdf:{protocol}"],
166+
".shp": [f"geodataframe:shapefile:{protocol}"],
167+
".zarr": [f"dataset:zarr:{protocol}"],
168+
}
169+
if accessor_type == "openers":
170+
geotiff_openers = {
171+
".geotiff": [
172+
f"mldataset:geotiff:{protocol}",
173+
f"dataset:geotiff:{protocol}",
174+
],
175+
".tif": [
176+
f"mldataset:geotiff:{protocol}",
177+
f"dataset:geotiff:{protocol}",
178+
],
179+
".tiff": [
180+
f"mldataset:geotiff:{protocol}",
181+
f"dataset:geotiff:{protocol}",
182+
],
183+
}
184+
expected_accessors.update(geotiff_openers)
185+
self.assertEqual(accessors, expected_accessors)
186+
187+
100188
def test_fsspec_instantiation_error():
101189
error_string = "deliberate instantiation error for testing"
102190
register_implementation(

xcube/core/store/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
new_data_descriptor,
3333
)
3434
from .error import DataStoreError
35-
from .fs.registry import new_fs_data_store
35+
from .fs.registry import get_filename_extensions, new_fs_data_store
3636
from .preload import PreloadHandle, PreloadState, PreloadStatus
3737
from .search import DataSearcher, DefaultSearchMixin
3838
from .store import (

xcube/core/store/fs/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import copy
66
from abc import ABC, abstractmethod
7-
from typing import Any, Dict, Optional, Tuple
7+
from typing import Any, Optional
88

99
import fsspec
1010

xcube/core/store/fs/impl/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# https://opensource.org/licenses/MIT.
44

55
from abc import ABC
6-
from typing import Optional, Tuple
6+
from typing import Optional
77

88
import fsspec
99
import rasterio

xcube/core/store/fs/registry.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
44

5-
from collections.abc import Sequence
6-
from typing import Any, Optional
5+
from collections.abc import Mapping, Sequence
6+
from typing import Any, Literal, Optional
77

88
import fsspec
99

10+
from ..accessor import find_data_opener_extensions, find_data_writer_extensions
1011
from ..assertions import assert_valid_params
1112
from ..error import DataStoreError
1213
from .accessor import FsAccessor, FsDataAccessor
@@ -247,3 +248,42 @@ def new_fs_data_store(
247248
}
248249
assert_valid_params(store_params, name="store_params", schema=store_params_schema)
249250
return fs_data_store_class(**store_params)
251+
252+
253+
def get_filename_extensions(
254+
accessor_type: Literal["openers", "writers"] = "openers",
255+
) -> Mapping[str, list[str]]:
256+
"""Returns a mapping from filename extensions to lists of
257+
data accessor ids that open data from this format.
258+
259+
The method either returns mappings to data opener ids
260+
or to data writer ids, depending on the setting of accessor type.
261+
262+
Args:
263+
accessor_type: Either "openers" or "writers",
264+
indicates whether mappings of file extensions
265+
to data openers or writers shall be returned.
266+
Default is "openers"
267+
268+
Returns:
269+
A mapping from filename extensions to lists of data accessor ids
270+
"""
271+
if accessor_type == "openers":
272+
find_extensions = find_data_opener_extensions
273+
elif accessor_type == "writers":
274+
find_extensions = find_data_writer_extensions
275+
else:
276+
raise DataStoreError(
277+
f"Invalid accessor type. "
278+
f"Must be 'openers' or 'writers', was '{accessor_type}'"
279+
)
280+
filename_extensions = {}
281+
for ext in find_extensions():
282+
print(ext.name)
283+
file_extensions = ext.metadata.get("extensions", [])
284+
for file_extension in file_extensions:
285+
if file_extension in filename_extensions:
286+
filename_extensions[file_extension] += [ext.name]
287+
else:
288+
filename_extensions[file_extension] = [ext.name]
289+
return filename_extensions

0 commit comments

Comments
 (0)