Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9bf4848
feat: implement unified stores configuration system
dimitri-yatsenko Jan 14, 2026
90d6d55
feat: update references to use unified stores configuration
dimitri-yatsenko Jan 14, 2026
76d5f94
test: update test_settings.py for unified stores configuration
dimitri-yatsenko Jan 14, 2026
662471b
test: update test fixtures for unified stores configuration
dimitri-yatsenko Jan 14, 2026
1366776
test: update test_update1.py fixture for unified stores
dimitri-yatsenko Jan 14, 2026
cc77b37
feat: enforce reserved sections for filepath codec
dimitri-yatsenko Jan 14, 2026
7df2f97
test: Add unit tests for filepath reserved section validation
dimitri-yatsenko Jan 14, 2026
fe29274
fix: Only set 'secure' default for S3 protocol
dimitri-yatsenko Jan 14, 2026
79f60b2
feat: Add configurable prefixes for storage sections
dimitri-yatsenko Jan 14, 2026
1a6c167
feat: Add separate filepath_default for filepath references
dimitri-yatsenko Jan 14, 2026
2d7d935
docs: clarify <filepath@> error message to enforce @ convention
dimitri-yatsenko Jan 14, 2026
401bffe
chore: bump version to 2.0.0a22 and apply pre-commit formatting
dimitri-yatsenko Jan 14, 2026
5ccf3aa
test: integration tests - 496 passed, 24 object storage fixture failures
dimitri-yatsenko Jan 14, 2026
4cd99f6
fix: create storage directories in test fixtures for StorageBackend v…
dimitri-yatsenko Jan 14, 2026
6487ae4
test: update summary - all 520 integration tests passing ✓
dimitri-yatsenko Jan 14, 2026
35a2c60
style: apply ruff-format to conftest.py
dimitri-yatsenko Jan 14, 2026
5512659
Remove 'objects' literal from schema-addressed storage paths
dimitri-yatsenko Jan 14, 2026
c3e0163
Support partition_pattern in schema-addressed storage
dimitri-yatsenko Jan 14, 2026
cd7c89f
docs: fix partition_pattern order preservation
dimitri-yatsenko Jan 14, 2026
119eb9b
feat: add parallel schema migration helpers to migrate module
dimitri-yatsenko Jan 14, 2026
93cf0ef
feat: add migrate_external_pointers_v2 helper
dimitri-yatsenko Jan 14, 2026
4b0e9a8
style: fix linting issues in migrate.py
dimitri-yatsenko Jan 14, 2026
63ecba9
style: apply ruff-format to builtin_codecs.py
dimitri-yatsenko Jan 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions integration_test_summary.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## Integration Test Results - FINAL

**Test Summary:**
- ✅ 520 tests PASSED
- ⏭️ 7 tests SKIPPED
- ❌ 0 tests FAILED

**All tests passing!**

### Initial Issues Found and Fixed

Initial run had 24 failures in object storage tests due to test fixture bug:
- `conftest.py`: object_storage_config wasn't creating the `test_project` subdirectory
- `test_update1.py`: mock_stores_update wasn't creating `djtest` subdirectories

**Root cause:** Test fixtures were configuring storage locations but not creating
the directories. StorageBackend validates that file protocol locations exist
during initialization.

**Fix:** Added `Path(location).mkdir(parents=True, exist_ok=True)` in both fixtures.

### Test Coverage Verified

All unified stores configuration functionality tested:
- ✅ Configuration system with stores.default and stores.filepath_default
- ✅ Prefix validation and separation (hash_prefix, schema_prefix, filepath_prefix)
- ✅ Filepath codec validation with dynamic prefix checking
- ✅ Store backend initialization and validation
- ✅ Object storage (file, stream, folder operations)
- ✅ Hash-addressed storage (blob, attach)
- ✅ Schema-addressed storage (object, npy)
- ✅ All relational operators and queries
- ✅ Schema management and dependencies
78 changes: 72 additions & 6 deletions src/datajoint/builtin_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,13 +323,16 @@ def _build_path(
field: str,
primary_key: dict,
ext: str | None = None,
store_name: str | None = None,
) -> tuple[str, str]:
"""
Build schema-addressed storage path.

Constructs a path that mirrors the database schema structure:
``{schema}/{table}/{pk_values}/{field}{ext}``

Supports partitioning if configured in the store.

Parameters
----------
schema : str
Expand All @@ -342,6 +345,8 @@ def _build_path(
Primary key values.
ext : str, optional
File extension (e.g., ".npy", ".zarr").
store_name : str, optional
Store name for retrieving partition configuration.

Returns
-------
Expand All @@ -350,13 +355,21 @@ def _build_path(
is a unique identifier.
"""
from .storage import build_object_path
from . import config

# Get store configuration for partition_pattern and token_length
spec = config.get_store_spec(store_name)
partition_pattern = spec.get("partition_pattern")
token_length = spec.get("token_length", 8)

return build_object_path(
schema=schema,
table=table,
field=field,
primary_key=primary_key,
ext=ext,
partition_pattern=partition_pattern,
token_length=token_length,
)

def _get_backend(self, store_name: str | None = None):
Expand Down Expand Up @@ -518,7 +531,7 @@ def encode(
raise TypeError(f"<object> expects bytes or path, got {type(value).__name__}")

# Build storage path using inherited helper
path, token = self._build_path(schema, table, field, primary_key, ext=ext)
path, token = self._build_path(schema, table, field, primary_key, ext=ext, store_name=store_name)

# Get storage backend using inherited helper
backend = self._get_backend(store_name)
Expand Down Expand Up @@ -733,10 +746,16 @@ class FilepathCodec(Codec):

External only - requires @store.

This codec gives users maximum freedom in organizing their files while
reusing DataJoint's store configuration. Files can be placed anywhere
in the store EXCEPT the reserved ``_hash/`` and ``_schema/`` sections
which are managed by DataJoint.

This is useful when:
- Files are managed externally (e.g., by acquisition software)
- Files are too large to copy
- You want to reference shared datasets
- You need custom directory structures

Example::

Expand All @@ -749,6 +768,7 @@ class Recordings(dj.Manual):
'''

# Reference an existing file (no copy)
# Path is relative to store location
table.insert1({'recording_id': 1, 'raw_data': 'subject01/session001/data.bin'})

# Fetch returns ObjectRef for lazy access
Expand All @@ -757,7 +777,10 @@ class Recordings(dj.Manual):
ref.download() # Download to local path

Storage Format:
JSON metadata: ``{path, store}``
JSON metadata: ``{path, store, size, timestamp}``

Reserved Sections:
Paths cannot start with ``_hash/`` or ``_schema/`` - these are managed by DataJoint.

Warning:
The file must exist in the store at the specified path.
Expand All @@ -769,7 +792,9 @@ class Recordings(dj.Manual):
def get_dtype(self, is_store: bool) -> str:
"""Filepath is external only."""
if not is_store:
raise DataJointError("<filepath> requires @store")
raise DataJointError(
"<filepath> requires @ symbol. Use <filepath@> for default store " "or <filepath@store> to specify store."
)
return "json"

def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> dict:
Expand All @@ -779,7 +804,7 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
Parameters
----------
value : str
Relative path within the store.
Relative path within the store. Cannot use reserved sections (_hash/, _schema/).
key : dict, optional
Primary key values (unused).
store_name : str, optional
Expand All @@ -789,14 +814,55 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
-------
dict
Metadata dict: ``{path, store}``.

Raises
------
ValueError
If path uses reserved sections (_hash/ or _schema/).
FileNotFoundError
If file does not exist in the store.
"""
from datetime import datetime, timezone

from . import config
from .hash_registry import get_store_backend

path = str(value)

# Optionally verify file exists
# Get store spec to check prefix configuration
# Use filepath_default if no store specified (filepath is not part of OAS)
spec = config.get_store_spec(store_name, use_filepath_default=True)

# Validate path doesn't use reserved sections (hash and schema)
path_normalized = path.lstrip("/")
reserved_prefixes = []

hash_prefix = spec.get("hash_prefix")
if hash_prefix:
reserved_prefixes.append(("hash_prefix", hash_prefix))

schema_prefix = spec.get("schema_prefix")
if schema_prefix:
reserved_prefixes.append(("schema_prefix", schema_prefix))

# Check if path starts with any reserved prefix
for prefix_name, prefix_value in reserved_prefixes:
prefix_normalized = prefix_value.strip("/") + "/"
if path_normalized.startswith(prefix_normalized):
raise ValueError(
f"<filepath@> cannot use reserved section '{prefix_value}' ({prefix_name}). "
f"This section is managed by DataJoint. "
f"Got path: {path}"
)

# If filepath_prefix is configured, enforce it
filepath_prefix = spec.get("filepath_prefix")
if filepath_prefix:
filepath_prefix_normalized = filepath_prefix.strip("/") + "/"
if not path_normalized.startswith(filepath_prefix_normalized):
raise ValueError(f"<filepath@> must use prefix '{filepath_prefix}' (filepath_prefix). " f"Got path: {path}")

# Verify file exists
backend = get_store_backend(store_name)
if not backend.exists(path):
raise FileNotFoundError(f"File not found in store '{store_name or 'default'}': {path}")
Expand Down Expand Up @@ -1179,7 +1245,7 @@ def encode(
schema, table, field, primary_key = self._extract_context(key)

# Build schema-addressed storage path
path, _ = self._build_path(schema, table, field, primary_key, ext=".npy")
path, _ = self._build_path(schema, table, field, primary_key, ext=".npy", store_name=store_name)

# Serialize to .npy format
buffer = io.BytesIO()
Expand Down
15 changes: 5 additions & 10 deletions src/datajoint/hash_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,20 +138,15 @@ def get_store_backend(store_name: str | None = None) -> StorageBackend:
Parameters
----------
store_name : str, optional
Name of the store to use. If None, uses the default object storage
configuration or the configured default_store.
Name of the store to use. If None, uses stores.default.

Returns
-------
StorageBackend
StorageBackend instance.
"""
# If store_name is None, check for configured default_store
if store_name is None and config.object_storage.default_store:
store_name = config.object_storage.default_store

# get_object_store_spec handles None by returning default object_storage config
spec = config.get_object_store_spec(store_name)
# get_store_spec handles None by using stores.default
spec = config.get_store_spec(store_name)
return StorageBackend(spec)


Expand All @@ -162,14 +157,14 @@ def get_store_subfolding(store_name: str | None = None) -> tuple[int, ...] | Non
Parameters
----------
store_name : str, optional
Name of the store. If None, uses default store.
Name of the store. If None, uses stores.default.

Returns
-------
tuple[int, ...] | None
Subfolding pattern (e.g., (2, 2)) or None for flat storage.
"""
spec = config.get_object_store_spec(store_name)
spec = config.get_store_spec(store_name)
subfolding = spec.get("subfolding")
if subfolding is not None:
return tuple(subfolding)
Expand Down
Loading
Loading