Skip to content

Commit 1e65de9

Browse files
committed
first version of kassandra store
1 parent 719c8f0 commit 1e65de9

File tree

9 files changed

+513
-1
lines changed

9 files changed

+513
-1
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ cython_debug/
165165
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166166
# and can be added to the global gitignore or merged into this file. For a more nuclear
167167
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
168-
#.idea/
168+
.idea/
169169

170170
# PyPI configuration file
171171
.pypirc

doors_stores/__init__.py

Whitespace-only changes.

doors_stores/constants.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# The MIT License (MIT)
2+
# Copyright (c) 2025 Tonio Fincke
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in all
12+
# copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
# SOFTWARE.
21+
22+
KASSANDRA_DATA_STORE_ID = "kassandra_ref"
23+
24+
KERCHUNK_FILE_TEMPLATE = {
25+
"version": 1,
26+
"refs": {
27+
".zgroup": {
28+
"zarr_format": 2
29+
},
30+
".zattrs": {},
31+
"time/.zarray": {
32+
"shape": 0,
33+
"chunks": 0,
34+
"fill_value": None,
35+
"order": "C",
36+
"filters": None,
37+
"dimension_separator": ".",
38+
"compressor": None,
39+
"zarr_format": 2,
40+
"dtype": ">f8"
41+
},
42+
"time/.zattrs": {
43+
"_ARRAY_DIMENSIONS": ["time"],
44+
"standard_name": "time",
45+
"units": "seconds since 1970-01-01 00:00:00 UTC",
46+
"calendar": "standard",
47+
"axis": "T"
48+
},
49+
"time/0": [
50+
"",
51+
3248,
52+
0
53+
],
54+
"lat/.zarray": {
55+
"shape": [288],
56+
"chunks": [288],
57+
"fill_value": None,
58+
"order": "C",
59+
"filters": None,
60+
"dimension_separator": ".",
61+
"compressor": None,
62+
"zarr_format": 2,
63+
"dtype": ">f4"
64+
},
65+
"lat/.zattrs": {
66+
"_ARRAY_DIMENSIONS": ["lat"],
67+
"standard_name": "latitude",
68+
"long_name": "latitude",
69+
"units": "degrees_north",
70+
"axis": "Y"
71+
},
72+
"lat/0": [
73+
"https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?latitude%5B(40.9):1:(46.64)%5D",
74+
3236,
75+
1152
76+
],
77+
"lon/.zarray": {
78+
"shape": [717],
79+
"chunks": [717],
80+
"fill_value": None,
81+
"order": "C",
82+
"filters": None,
83+
"dimension_separator": ".",
84+
"compressor": None,
85+
"zarr_format": 2,
86+
"dtype": ">f4"
87+
},
88+
"lon/.zattrs": {
89+
"_ARRAY_DIMENSIONS": ["lon"],
90+
"standard_name": "longitude",
91+
"long_name": "longitude",
92+
"units": "degrees_east",
93+
"axis": "X"
94+
},
95+
"lon/0": [
96+
"https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?longitude%5B(27.46):1:(41.78)%5D",
97+
3240,
98+
2868
99+
],
100+
"mean_wave_period/.zarray": {
101+
"shape": [0, 288, 717],
102+
"chunks": [1, 288, 717],
103+
"fill_value": -999.0,
104+
"order": "C",
105+
"filters": None,
106+
"dimension_separator": ".",
107+
"compressor": None,
108+
"zarr_format": 2,
109+
"dtype": ">f4"
110+
},
111+
"mean_wave_period/.zattrs": {
112+
"_ARRAY_DIMENSIONS": ["time", "lat", "lon"],
113+
"standard_name": "sea_surface_swell_wave_period",
114+
"long_name": "Mean Wave Period",
115+
"units": "s",
116+
"cell_methods": "level: mean",
117+
"color_norm": "log",
118+
"color_value_min": 0.0,
119+
"color_value_max": 20.0,
120+
},
121+
"sign_wave_height/.zarray": {
122+
"shape": [0, 288, 717],
123+
"chunks": [1, 288, 717],
124+
"fill_value": -999.0,
125+
"order": "C",
126+
"filters": None,
127+
"dimension_separator": ".",
128+
"compressor": None,
129+
"zarr_format": 2,
130+
"dtype": ">f4"
131+
},
132+
"sign_wave_height/.zattrs": {
133+
"_ARRAY_DIMENSIONS": ["time", "lat", "lon"],
134+
"standard_name": "sea_surface_wave_significant_height",
135+
"long_name": "Significant Wave Height",
136+
"units": "m",
137+
"cell_methods": "level: mean",
138+
"color_norm": "log",
139+
"color_value_min": 0.0,
140+
"color_value_max": 10.0,
141+
},
142+
"wave_direction/.zarray": {
143+
"shape": [0, 288, 717],
144+
"chunks": [1, 288, 717],
145+
"fill_value": -999.0,
146+
"order": "C",
147+
"filters": None,
148+
"dimension_separator": ".",
149+
"compressor": None,
150+
"zarr_format": 2,
151+
"dtype": ">f4"
152+
},
153+
"wave_direction/.zattrs": {
154+
"_ARRAY_DIMENSIONS": ["time", "lat", "lon"],
155+
"standard_name": "sea_surface_wave_to_direction",
156+
"long_name": "Mean Wave Direction",
157+
"units": "deg",
158+
"cell_methods": "level: mean",
159+
"color_value_min": 0.0,
160+
"color_value_max": 360.0,
161+
"color_bar_name": "twilight_shifted"
162+
},
163+
}
164+
}
165+
166+
VARCHUNK_TEMPLATE = [
167+
"",
168+
8744,
169+
825984
170+
]
171+
172+
TIME_URL_TEMPLATE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?time%5B(2024-06-30T00:00:00Z):1:({timestep})%5D"
173+
MVP_URL_TEMPLATE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?mean_wave_period%5B({timestep}):1:({timestep})%5D%5B(40.9):1:(46.64)%5D%5B(27.46):1:(41.78)%5D"
174+
SWH_URL_TEMPLATE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?sign_wave_height%5B({timestep}):1:({timestep})%5D%5B(40.9):1:(46.64)%5D%5B(27.46):1:(41.78)%5D"
175+
WD_URL_TEMPLATE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?wave_direction%5B({timestep}):1:({timestep})%5D%5B(40.9):1:(46.64)%5D%5B(27.46):1:(41.78)%5D"
176+
# SWH_URL_TEMPLATE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?mean_wave_period%5B(2025-03-08T01:00:00Z):1:(2025-03-08T01:00:00Z)%5D%5B(40.9):1:(46.64)%5D%5B(27.46):1:(41.78)%5D"
177+
# WD_URL_TEMPLATE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.nc?mean_wave_period%5B(2025-03-08T01:00:00Z):1:(2025-03-08T01:00:00Z)%5D%5B(40.9):1:(46.64)%5D%5B(27.46):1:(41.78)%5D"
178+
179+
# TIME_FORMAT = "yyyy-mm-ddThh:MM:ss:00Z"
180+
TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

doors_stores/kassandrastore.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# The MIT License (MIT)
2+
# Copyright (c) 2025 Tonio Fincke
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in all
12+
# copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
# SOFTWARE.
21+
22+
import copy
23+
import fsspec
24+
import json
25+
import requests
26+
import xarray as xr
27+
import warnings
28+
29+
from collections.abc import MutableMapping
30+
from datetime import datetime
31+
from datetime import timedelta
32+
from pydap.parsers.das import parse_das
33+
from pydap.parsers.dds import DDSParser
34+
from typing import Dict
35+
36+
from xcube.core.store.datatype import DataType
37+
from xcube.core.store import get_data_store_class
38+
39+
from .constants import KERCHUNK_FILE_TEMPLATE
40+
from .constants import MVP_URL_TEMPLATE
41+
from .constants import SWH_URL_TEMPLATE
42+
from .constants import TIME_FORMAT
43+
from .constants import TIME_URL_TEMPLATE
44+
from .constants import VARCHUNK_TEMPLATE
45+
from .constants import WD_URL_TEMPLATE
46+
47+
KASSANDRA_DATASET_ATTRIBUTE_STRUCTURE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.das"
48+
KASSANDRA_DATASET_DESCRIPTOR_STRUCTURE = "https://erddap-danubius.ve.ismar.cnr.it/erddap/griddap/kassandra_bs.dds"
49+
KASSANDRA_REFERENCE_FILENAME = "erddap_kassandra_mod_blk_phy_wav_for_2km_PT1H-1_202503.json"
50+
KASSANDRA_VARIABLE_TEMPLATES = {
51+
"mean_wave_period": MVP_URL_TEMPLATE,
52+
"sign_wave_height": SWH_URL_TEMPLATE,
53+
"wave_direction": WD_URL_TEMPLATE
54+
}
55+
56+
ReferenceDataStore = get_data_store_class('reference')
57+
58+
59+
class KassandraKerchunkDataStore(ReferenceDataStore):
60+
61+
def __init__(self):
62+
self._kass_ref = self._get_kassandra_reference_dictionary()
63+
with open(KASSANDRA_REFERENCE_FILENAME, "w") as krf:
64+
json.dump(self._kass_ref, krf, indent=4)
65+
super().__init__([KASSANDRA_REFERENCE_FILENAME])
66+
67+
def _read_das(self) -> Dict:
68+
response = requests.get(KASSANDRA_DATASET_ATTRIBUTE_STRUCTURE)
69+
das_content = response.text
70+
return parse_das(das_content)
71+
72+
def _get_num_timesteps(self) -> int:
73+
response = requests.get(KASSANDRA_DATASET_DESCRIPTOR_STRUCTURE)
74+
dds = DDSParser(response.text).parse()
75+
return dds.get("time").size
76+
77+
def _get_kassandra_reference_dictionary(self) -> Dict:
78+
das = self._read_das()
79+
first_timestep = das.get("NC_GLOBAL").get("time_coverage_start")
80+
last_timestep = das.get("NC_GLOBAL").get("time_coverage_end")
81+
num_time_steps = self._get_num_timesteps()
82+
83+
kass_ref = KERCHUNK_FILE_TEMPLATE.copy()
84+
kass_ref["refs"][".zattrs"] = das.get("NC_GLOBAL")
85+
kass_ref["refs"]["time/.zarray"]["shape"] = [num_time_steps]
86+
kass_ref["refs"]["time/.zarray"]["chunks"] = [num_time_steps]
87+
kass_ref["refs"]["time/0"][0] = TIME_URL_TEMPLATE.format(timestep=last_timestep)
88+
kass_ref["refs"]["time/0"][2] = num_time_steps * 8
89+
kass_ref["refs"]["mean_wave_period/.zarray"]["shape"] = [num_time_steps, 288, 717]
90+
kass_ref["refs"]["sign_wave_height/.zarray"]["shape"] = [num_time_steps, 288, 717]
91+
kass_ref["refs"]["wave_direction/.zarray"]["shape"] = [num_time_steps, 288, 717]
92+
current_time = datetime.strptime(first_timestep, TIME_FORMAT)
93+
one_hour = timedelta(hours=1)
94+
for i in range(num_time_steps):
95+
ctf = current_time.strftime(TIME_FORMAT)
96+
for kass_var_name, template in KASSANDRA_VARIABLE_TEMPLATES.items():
97+
url = template.format(timestep=ctf)
98+
var_chunk = copy.deepcopy(VARCHUNK_TEMPLATE)
99+
var_chunk[0] = url
100+
kass_ref["refs"][f"{kass_var_name}/{i}.0.0"] = var_chunk
101+
current_time += one_hour
102+
return kass_ref
103+
104+
def _get_start_and_end(self, key: str):
105+
ref = self._kass_ref.get("refs").get(key)
106+
return ref[1], ref[1] + ref[2]
107+
108+
def open_data(
109+
self, data_id: str, opener_id: str = None, **open_params
110+
) -> xr.Dataset:
111+
data_type = open_params.pop("data_type", None)
112+
if DataType.normalize(data_type).alias == "mldataset":
113+
warnings.warn(
114+
"ReferenceDataStore can only represent the data resource as xr.Dataset."
115+
)
116+
if open_params:
117+
warnings.warn(
118+
f"open_params are not supported yet,"
119+
f" but passing forward {', '.join(open_params.keys())}"
120+
)
121+
ref_path = self._refs[data_id]["ref_path"]
122+
open_params.pop("consolidated", False)
123+
124+
class ByteSubsetMapper(MutableMapping):
125+
126+
def __init__(self, original_mapper, kass_ref):
127+
self.original_mapper = original_mapper
128+
self._kass_ref = kass_ref
129+
130+
def keys(self):
131+
return self.original_mapper.keys()
132+
133+
def values(self):
134+
return self.original_mapper.values()
135+
136+
def get(self, key):
137+
return self.original_mapper.get(key)
138+
139+
def __setitem__(self, key: str, value: bytes) -> None:
140+
self.original_mapper.__set_item__(key, value)
141+
142+
def __delitem__(self, key: str) -> None:
143+
self.original_mapper.__del_item__(key)
144+
145+
def __contains__(self, item):
146+
return self.original_mapper.__contains__(item)
147+
148+
def __iter__(self):
149+
return self.original_mapper.__iter__()
150+
151+
def __len__(self):
152+
return self.original_mapper.__len__()
153+
154+
def __getitem__(self, key):
155+
data = self.original_mapper[key]
156+
ref = self._kass_ref.get("refs").get(key)
157+
if isinstance(ref, list) and len(ref) >= 2:
158+
if len(data) > ref[2]:
159+
data = data[ref[1]:ref[1] + ref[2]]
160+
return data
161+
162+
ref_mapping = fsspec.get_mapper("reference://", fo=ref_path, **self._ref_kwargs)
163+
164+
ref_mapping = ByteSubsetMapper(ref_mapping, self._kass_ref)
165+
166+
return xr.open_zarr(ref_mapping, consolidated=False, **open_params)

doors_stores/plugin.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# The MIT License (MIT)
2+
# Copyright (c) 2025 by the xcube development team and contributors
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in all
12+
# copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
# SOFTWARE.
21+
22+
from xcube.constants import EXTENSION_POINT_DATA_STORES
23+
from xcube.util import extension
24+
25+
from .constants import KASSANDRA_DATA_STORE_ID
26+
27+
28+
def init_plugin(ext_registry: extension.ExtensionRegistry):
29+
ext_registry.add_extension(
30+
loader=extension.import_component(
31+
'doors_stores.kassandrastore:KassandraKerchunkDataStore'),
32+
point=EXTENSION_POINT_DATA_STORES,
33+
name=KASSANDRA_DATA_STORE_ID,
34+
description='xarray.Dataset in Kerchunk references format'
35+
' from Kassandra ERDDAP server'
36+
)

0 commit comments

Comments
 (0)