Skip to content

Commit b575ead

Browse files
authored
Merge pull request #535 from dcs4cop/toniof-516-unify-datasetio2
Unify Dataset IO
2 parents 322dadb + 9797cf9 commit b575ead

File tree

9 files changed

+457
-99
lines changed

9 files changed

+457
-99
lines changed

examples/serve/demo/config-with-stores.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ DataStores:
3636
# client_kwargs:
3737
# endpoint_url: https://s3.eu-central-1.amazonaws.com
3838
Datasets:
39-
- Identifier: "*.zarr"
39+
- Path: "*.zarr"
4040
Style: "default"
4141
# ChunkCacheSize: 1G
4242

test/core/store/test_storepool.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,34 @@ def test_multi_stores_with_params(self):
369369
self.assertIsInstance(pool, DataStorePool)
370370
self.assertEqual(["local-1", "local-2", "ram-1", "ram-2"], pool.store_instance_ids)
371371
for instance_id in pool.store_instance_ids:
372-
self.assertTrue(pool.has_store_config(instance_id))
372+
self.assertTrue(pool.has_store_instance(instance_id))
373373
self.assertIsInstance(pool.get_store_config(instance_id), DataStoreConfig)
374374
self.assertIsInstance(pool.get_store(instance_id), DataStore)
375+
376+
def test_get_store_instance_id(self):
377+
store_params_1 = {
378+
"root": "./bibo"
379+
}
380+
ds_config_1 = DataStoreConfig(store_id='file',
381+
store_params=store_params_1)
382+
ds_configs = {'dir-1': ds_config_1}
383+
pool = DataStorePool(ds_configs)
384+
385+
store_params_2 = {
386+
"root": "./babo"
387+
}
388+
ds_config_2 = DataStoreConfig(store_id='file',
389+
store_params=store_params_2)
390+
ds_config_3 = DataStoreConfig(store_id='file',
391+
store_params=store_params_1,
392+
title='A third configuration')
393+
394+
self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1))
395+
self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1,
396+
strict_check=True))
397+
398+
self.assertIsNone(pool.get_store_instance_id(ds_config_2))
399+
400+
self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_3))
401+
self.assertIsNone(pool.get_store_instance_id(ds_config_3,
402+
strict_check=True))

test/webapi/res/test/config-datastores.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ DataStores:
44
StoreParams:
55
root: examples/serve/demo
66
Datasets:
7-
- Identifier: "cube-1-250-250.zarr"
7+
- Path: "cube-1-250-250.zarr"
88
Style: "default"
9-
- Identifier: "cube-5-100-200.zarr"
9+
- Path: "cube-5-100-200.zarr"
1010
Style: "default"

test/webapi/test_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_from_dict(self):
6060
},
6161
"Datasets": [
6262
{
63-
"Identifier": "*.zarr",
63+
"Path": "*.zarr",
6464
"Style": "default"
6565
}
6666
]

test/webapi/test_context.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,246 @@ def test_interpolates_vars(self):
221221
normalize_prefix('/${name}'))
222222
self.assertEqual(f'/xcube/v{version}',
223223
normalize_prefix('/${name}/v${version}'))
224+
225+
226+
class MaybeAssignStoreInstanceIdsTest(unittest.TestCase):
227+
228+
def test_find_common_store(self):
229+
ctx = new_test_service_context()
230+
dataset_configs = [
231+
{
232+
'Identifier': 'z_0',
233+
'FileSystem': 'local',
234+
'Path': '/one/path/abc.zarr'
235+
},
236+
{
237+
'Identifier': 'z_1',
238+
'FileSystem': 'local',
239+
'Path': '/one/path/def.zarr'
240+
},
241+
{
242+
'Identifier': 'z_4',
243+
'FileSystem': 'obs',
244+
'Path': '/one/path/mno.zarr'
245+
},
246+
{
247+
'Identifier': 'z_2',
248+
'FileSystem': 'local',
249+
'Path': '/another/path/ghi.zarr'
250+
},
251+
{
252+
'Identifier': 'z_3',
253+
'FileSystem': 'local',
254+
'Path': '/one/more/path/jkl.zarr'
255+
},
256+
{
257+
'Identifier': 'z_5',
258+
'FileSystem': 'obs',
259+
'Path': '/one/path/pqr.zarr'
260+
},
261+
{
262+
'Identifier': 'z_6',
263+
'FileSystem': 'local',
264+
'Path': '/one/path/stu.zarr'
265+
},
266+
{
267+
'Identifier': 'z_7',
268+
'FileSystem': 'local',
269+
'Path': '/one/more/path/vwx.zarr'
270+
},
271+
]
272+
ctx.config['Datasets'] = dataset_configs
273+
adjusted_dataset_configs = ctx.get_dataset_configs()
274+
275+
expected_dataset_configs = [
276+
{
277+
'Identifier': 'z_0',
278+
'FileSystem': 'local',
279+
'Path': 'path/abc.zarr',
280+
'StoreInstanceId': 'local_2'
281+
},
282+
{
283+
'Identifier': 'z_1',
284+
'FileSystem': 'local',
285+
'Path': 'path/def.zarr',
286+
'StoreInstanceId': 'local_2'
287+
},
288+
{
289+
'Identifier': 'z_4',
290+
'FileSystem': 'obs',
291+
'Path': 'mno.zarr',
292+
'StoreInstanceId': 'obs_1'
293+
},
294+
{
295+
'Identifier': 'z_2',
296+
'FileSystem': 'local',
297+
'Path': 'ghi.zarr',
298+
'StoreInstanceId': 'local_1'
299+
},
300+
{
301+
'Identifier': 'z_3',
302+
'FileSystem': 'local',
303+
'Path': 'more/path/jkl.zarr',
304+
'StoreInstanceId': 'local_2'
305+
},
306+
{
307+
'Identifier': 'z_5',
308+
'FileSystem': 'obs',
309+
'Path': 'pqr.zarr',
310+
'StoreInstanceId': 'obs_1'
311+
},
312+
{
313+
'Identifier': 'z_6',
314+
'FileSystem': 'local',
315+
'Path': 'path/stu.zarr',
316+
'StoreInstanceId': 'local_2'
317+
},
318+
{
319+
'Identifier': 'z_7',
320+
'FileSystem': 'local',
321+
'Path': 'more/path/vwx.zarr',
322+
'StoreInstanceId': 'local_2'
323+
},
324+
]
325+
self.assertEqual(expected_dataset_configs, adjusted_dataset_configs)
326+
327+
def test_with_instance_id(self):
328+
ctx = new_test_service_context()
329+
dataset_config = {'Identifier': 'zero',
330+
'Title': 'Test 0',
331+
'FileSystem': 'local',
332+
'StoreInstanceId': 'some_id'}
333+
dataset_config_copy = dataset_config.copy()
334+
335+
ctx.config['Datasets'] = [dataset_config]
336+
dataset_config = ctx.get_dataset_configs()[0]
337+
338+
self.assertEqual(dataset_config_copy, dataset_config)
339+
340+
def test_local(self):
341+
ctx = new_test_service_context()
342+
dataset_config = {'Identifier': 'one',
343+
'Title': 'Test 1',
344+
'FileSystem': 'local',
345+
'Path': 'cube-1-250-250.zarr'}
346+
347+
ctx.config['Datasets'] = [dataset_config]
348+
dataset_config = ctx.get_dataset_configs()[0]
349+
350+
self.assertEqual(['Identifier', 'Title', 'FileSystem', 'Path',
351+
'StoreInstanceId'],
352+
list(dataset_config.keys()))
353+
self.assertEqual('one',
354+
dataset_config['Identifier'])
355+
self.assertEqual('Test 1', dataset_config['Title'])
356+
self.assertEqual('local', dataset_config['FileSystem'])
357+
self.assertEqual('cube-1-250-250.zarr', dataset_config["Path"])
358+
self.assertEqual('local_1', dataset_config['StoreInstanceId'])
359+
360+
def test_s3(self):
361+
ctx = new_test_service_context()
362+
dataset_config = {'Identifier': 'two',
363+
'Title': 'Test 2',
364+
'FileSystem': 'obs',
365+
'Endpoint': 'https://s3.eu-central-1.amazonaws.com',
366+
'Path': 'xcube-examples/OLCI-SNS-RAW-CUBE-2.zarr',
367+
'Region': 'eu-central-1'}
368+
369+
ctx.config['Datasets'] = [dataset_config]
370+
dataset_config = ctx.get_dataset_configs()[0]
371+
372+
self.assertEqual(['Identifier', 'Title', 'FileSystem', 'Endpoint',
373+
'Path', 'Region', 'StoreInstanceId'],
374+
list(dataset_config.keys()))
375+
self.assertEqual('two', dataset_config['Identifier'])
376+
self.assertEqual('Test 2', dataset_config['Title'])
377+
self.assertEqual('obs', dataset_config['FileSystem'])
378+
self.assertEqual('https://s3.eu-central-1.amazonaws.com',
379+
dataset_config['Endpoint'])
380+
self.assertEqual('OLCI-SNS-RAW-CUBE-2.zarr', dataset_config['Path'])
381+
self.assertEqual('eu-central-1', dataset_config['Region'])
382+
self.assertEqual('obs_1', dataset_config['StoreInstanceId'])
383+
384+
def test_memory(self):
385+
ctx = new_test_service_context()
386+
dataset_config = {'Identifier': 'three',
387+
'Title': 'Test 3',
388+
'FileSystem': 'memory'}
389+
dataset_config_copy = dataset_config.copy()
390+
391+
ctx.config['Datasets'] = [dataset_config]
392+
dataset_config = ctx.get_dataset_configs()[0]
393+
394+
self.assertEqual(dataset_config_copy, dataset_config)
395+
396+
def test_missing_file_system(self):
397+
ctx = new_test_service_context()
398+
dataset_config = {'Identifier': 'five',
399+
'Title': 'Test 5',
400+
'Path': 'cube-1-250-250.zarr'}
401+
402+
ctx.config['Datasets'] = [dataset_config]
403+
dataset_config = ctx.get_dataset_configs()[0]
404+
405+
self.assertEqual(['Identifier', 'Title', 'Path', 'StoreInstanceId'],
406+
list(dataset_config.keys()))
407+
self.assertEqual('five', dataset_config['Identifier'])
408+
self.assertEqual('Test 5', dataset_config['Title'])
409+
self.assertEqual('cube-1-250-250.zarr', dataset_config['Path'])
410+
self.assertEqual('local_1', dataset_config['StoreInstanceId'])
411+
412+
def test_invalid_file_system(self):
413+
ctx = new_test_service_context()
414+
dataset_config = {'Identifier': 'five',
415+
'Title': 'Test 5a',
416+
'FileSystem': 'invalid',
417+
'Path': 'cube-1-250-250.zarr'}
418+
419+
ctx.config['Datasets'] = [dataset_config]
420+
dataset_config = ctx.get_dataset_configs()[0]
421+
422+
self.assertEqual(['Identifier', 'Title', 'FileSystem', 'Path'],
423+
list(dataset_config.keys()))
424+
self.assertEqual('five', dataset_config['Identifier'])
425+
self.assertEqual('Test 5a', dataset_config['Title'])
426+
self.assertEqual('invalid', dataset_config['FileSystem'])
427+
self.assertEqual('cube-1-250-250.zarr', dataset_config['Path'])
428+
429+
def test_local_store_already_existing(self):
430+
ctx = new_test_service_context()
431+
dataset_config_1 = {'Identifier': 'six',
432+
'Title': 'Test 6',
433+
'FileSystem': 'local',
434+
'Path': 'cube-1-250-250.zarr'}
435+
dataset_config_2 = {'Identifier': 'six_a',
436+
'Title': 'Test 6 a',
437+
'FileSystem': 'local',
438+
'Path': 'cube-5-100-200.zarr'}
439+
440+
ctx.config['Datasets'] = [dataset_config_1, dataset_config_2]
441+
dataset_configs = ctx.get_dataset_configs()
442+
443+
self.assertEqual(dataset_configs[0]['StoreInstanceId'],
444+
dataset_configs[1]['StoreInstanceId'])
445+
446+
def test_s3_store_already_existing(self):
447+
ctx = new_test_service_context()
448+
dataset_config_1 = {'Identifier': 'seven',
449+
'Title': 'Test 7',
450+
'FileSystem': 'obs',
451+
'Endpoint': 'https://s3.eu-central-1.amazonaws.com',
452+
'Path': 'xcube-examples/OLCI-SNS-RAW-CUBE-2.zarr',
453+
'Region': 'eu-central-1'}
454+
455+
dataset_config_2 = {'Identifier': 'seven_a',
456+
'Title': 'Test 7 a',
457+
'FileSystem': 'obs',
458+
'Endpoint': 'https://s3.eu-central-1.amazonaws.com',
459+
'Path': 'xcube-examples/OLCI-SNS-RAW-CUBE-3.zarr',
460+
'Region': 'eu-central-1'}
461+
462+
ctx.config['Datasets'] = [dataset_config_1, dataset_config_2]
463+
dataset_configs = ctx.get_dataset_configs()
464+
465+
self.assertEqual(dataset_configs[0]['StoreInstanceId'],
466+
dataset_configs[1]['StoreInstanceId'])

xcube/core/store/storepool.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,25 @@ def store_instance_ids(self) -> List[str]:
263263
def store_configs(self) -> List[DataStoreConfig]:
264264
return [v.store_config for k, v in self._instances.items()]
265265

266-
def has_store_config(self, store_instance_id: str) -> bool:
266+
def get_store_instance_id(self,
267+
store_config: DataStoreConfig,
268+
strict_check: bool = False) -> Optional[str]:
269+
assert_instance(store_config, DataStoreConfig, 'store_config')
270+
for id, instance in self._instances.items():
271+
if strict_check:
272+
if instance.store_config == store_config:
273+
return id
274+
else:
275+
if instance.store_config.store_id == store_config.store_id and \
276+
instance.store_config.store_params == \
277+
store_config.store_params:
278+
return id
279+
return None
280+
281+
def has_store_config(self, store_config: DataStoreConfig) -> bool:
282+
return self.get_store_instance_id(store_config) is not None
283+
284+
def has_store_instance(self, store_instance_id: str) -> bool:
267285
assert_instance(store_instance_id, str, 'store_instance_id')
268286
return store_instance_id in self._instances
269287

xcube/webapi/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def get_schema(cls) -> JsonObjectSchema:
9191
factory=DatasetConfig,
9292
required=[
9393
'Identifier',
94+
'Path'
9495
],
9596
properties=dict(
9697
Identifier=IdentifierSchema,
@@ -181,10 +182,11 @@ def get_schema(cls) -> JsonObjectSchema:
181182
return JsonObjectSchema(
182183
factory=DataStoreDatasetConfig,
183184
required=[
184-
'Identifier'
185+
'Path'
185186
],
186187
properties=dict(
187188
Identifier=IdentifierSchema,
189+
Path=PathSchema,
188190
StoreInstanceId=IdentifierSchema, # will be set by server
189191
StoreOpenParams=JsonObjectSchema(additional_properties=True),
190192
**_get_common_dataset_properties()

0 commit comments

Comments
 (0)