Skip to content

Commit 1a6c167

Browse files
feat: Add separate filepath_default for filepath references
Filepath storage is NOT part of the Object-Augmented Schema - it only provides references to externally-managed files. Allow separate default configuration for filepath references vs integrated storage. Configuration: - stores.default - for integrated storage (<blob>, <object>, <npy>, <attach>) - stores.filepath_default - for filepath references (<filepath>) This allows: - Integrated storage on S3 or fast filesystem - Filepath references to acquisition files on NAS or different location Example: { "stores": { "default": "main", "filepath_default": "raw_data", "main": { "protocol": "s3", "bucket": "processed-data", "location": "lab-project" }, "raw_data": { "protocol": "file", "location": "/mnt/nas/acquisition" } } } Usage: - data : <blob> # Uses stores.default (main) - arrays : <object> # Uses stores.default (main) - raw : <filepath> # Uses stores.filepath_default (raw_data) - raw : <filepath@acq> # Explicitly names store (overrides default) Changes: - settings.py: Add use_filepath_default parameter to get_store_spec() - builtin_codecs.py: FilepathCodec uses use_filepath_default=True - test_settings.py: Add 3 tests for filepath_default behavior - settings.py: Update template to include filepath_default example Architectural rationale: - Hash/schema storage: integrated into OAS, DataJoint manages lifecycle - Filepath storage: references only, users manage lifecycle - Different defaults reflect this fundamental distinction
1 parent 79f60b2 commit 1a6c167

File tree

3 files changed

+97
-7
lines changed

3 files changed

+97
-7
lines changed

src/datajoint/builtin_codecs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,8 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
815815
path = str(value)
816816

817817
# Get store spec to check prefix configuration
818-
spec = config.get_store_spec(store_name)
818+
# Use filepath_default if no store specified (filepath is not part of OAS)
819+
spec = config.get_store_spec(store_name, use_filepath_default=True)
819820

820821
# Validate path doesn't use reserved sections (hash and schema)
821822
path_normalized = path.lstrip("/")

src/datajoint/settings.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,14 +335,20 @@ def convert_path(cls, v: Any) -> Path | None:
335335
return None
336336
return Path(v) if not isinstance(v, Path) else v
337337

338-
def get_store_spec(self, store: str | None = None) -> dict[str, Any]:
338+
def get_store_spec(
339+
self, store: str | None = None, *, use_filepath_default: bool = False
340+
) -> dict[str, Any]:
339341
"""
340342
Get configuration for a storage store.
341343
342344
Parameters
343345
----------
344346
store : str, optional
345-
Name of the store to retrieve. If None, uses stores.default.
347+
Name of the store to retrieve. If None, uses the appropriate default.
348+
use_filepath_default : bool, optional
349+
If True and store is None, uses stores.filepath_default instead of
350+
stores.default. Use for filepath references which are not part of OAS.
351+
Default: False (use stores.default for integrated storage).
346352
347353
Returns
348354
-------
@@ -356,11 +362,23 @@ def get_store_spec(self, store: str | None = None) -> dict[str, Any]:
356362
"""
357363
# Handle default store
358364
if store is None:
359-
if "default" not in self.stores:
360-
raise DataJointError("stores.default is not configured")
361-
store = self.stores["default"]
365+
if use_filepath_default:
366+
# Filepath references use separate default (not part of OAS)
367+
if "filepath_default" not in self.stores:
368+
raise DataJointError(
369+
"stores.filepath_default is not configured. "
370+
"Set stores.filepath_default or specify store explicitly with <filepath@store>"
371+
)
372+
store = self.stores["filepath_default"]
373+
else:
374+
# Integrated storage (hash, schema) uses stores.default
375+
if "default" not in self.stores:
376+
raise DataJointError("stores.default is not configured")
377+
store = self.stores["default"]
378+
362379
if not isinstance(store, str):
363-
raise DataJointError("stores.default must be a string")
380+
default_key = "filepath_default" if use_filepath_default else "default"
381+
raise DataJointError(f"stores.{default_key} must be a string")
364382

365383
# Check store exists
366384
if store not in self.stores:
@@ -778,13 +796,18 @@ def save_template(
778796
},
779797
"stores": {
780798
"default": "main",
799+
"filepath_default": "raw_data",
781800
"main": {
782801
"protocol": "file",
783802
"location": "/data/my-project/main",
784803
"partition_pattern": None,
785804
"token_length": 8,
786805
"subfolding": None,
787806
},
807+
"raw_data": {
808+
"protocol": "file",
809+
"location": "/data/my-project/raw",
810+
},
788811
},
789812
"loglevel": "INFO",
790813
"safemode": True,

tests/unit/test_settings.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,72 @@ def test_get_store_spec_no_default_configured(self):
375375
finally:
376376
dj.config.stores = original_stores
377377

378+
def test_get_store_spec_filepath_default(self):
379+
"""Test filepath_default for filepath references (not part of OAS)."""
380+
original_stores = dj.config.stores.copy()
381+
try:
382+
dj.config.stores["default"] = "integrated"
383+
dj.config.stores["filepath_default"] = "raw_data"
384+
dj.config.stores["integrated"] = {
385+
"protocol": "s3",
386+
"endpoint": "s3.amazonaws.com",
387+
"bucket": "my-bucket",
388+
"location": "processed",
389+
"access_key": "xxx",
390+
"secret_key": "yyy",
391+
}
392+
dj.config.stores["raw_data"] = {
393+
"protocol": "file",
394+
"location": "/data/acquisition",
395+
}
396+
397+
# Regular default for integrated storage
398+
spec = dj.config.get_store_spec(None, use_filepath_default=False)
399+
assert spec["protocol"] == "s3"
400+
assert spec["location"] == "processed"
401+
402+
# Filepath default for filepath references
403+
spec = dj.config.get_store_spec(None, use_filepath_default=True)
404+
assert spec["protocol"] == "file"
405+
assert spec["location"] == "/data/acquisition"
406+
finally:
407+
dj.config.stores = original_stores
408+
409+
def test_get_store_spec_no_filepath_default(self):
410+
"""Test error when filepath_default not configured but requested."""
411+
original_stores = dj.config.stores.copy()
412+
try:
413+
dj.config.stores["default"] = "integrated"
414+
dj.config.stores["integrated"] = {
415+
"protocol": "file",
416+
"location": "/data/store",
417+
}
418+
# No filepath_default configured
419+
420+
with pytest.raises(DataJointError, match="stores.filepath_default is not configured"):
421+
dj.config.get_store_spec(None, use_filepath_default=True)
422+
finally:
423+
dj.config.stores = original_stores
424+
425+
def test_get_store_spec_explicit_store_ignores_defaults(self):
426+
"""Test that explicit store name bypasses both defaults."""
427+
original_stores = dj.config.stores.copy()
428+
try:
429+
dj.config.stores["default"] = "store_a"
430+
dj.config.stores["filepath_default"] = "store_b"
431+
dj.config.stores["store_a"] = {"protocol": "file", "location": "/a"}
432+
dj.config.stores["store_b"] = {"protocol": "file", "location": "/b"}
433+
dj.config.stores["store_c"] = {"protocol": "file", "location": "/c"}
434+
435+
# Explicitly naming store_c should work regardless of use_filepath_default
436+
spec = dj.config.get_store_spec("store_c", use_filepath_default=False)
437+
assert spec["location"] == "/c"
438+
439+
spec = dj.config.get_store_spec("store_c", use_filepath_default=True)
440+
assert spec["location"] == "/c"
441+
finally:
442+
dj.config.stores = original_stores
443+
378444

379445
class TestStoreSecrets:
380446
"""Test loading store credentials from secrets directory."""

0 commit comments

Comments
 (0)