Skip to content

Commit 0019109

Browse files
committed
Implement object column type for managed file storage
This commit adds a new `object` column type that provides managed file/folder storage with fsspec backend integration. Key features: - Object type declaration in declare.py (stores as JSON in MySQL) - ObjectRef class for fetch behavior with fsspec accessors (.fs, .store, .full_path) - Insert processing for file paths, folder paths, and (ext, stream) tuples - staged_insert1 context manager for direct writes (Zarr/xarray compatibility) - Path generation with partition pattern support - Store metadata file (datajoint_store.json) verification/creation - Folder manifest files for integrity verification The object type stores metadata inline (no hidden tables), supports multiple storage backends via fsspec (file, S3, GCS, Azure), and provides ObjectRef handles on fetch with direct storage access.
1 parent 949b8a6 commit 0019109

File tree

9 files changed

+1256
-6
lines changed

9 files changed

+1256
-6
lines changed

src/datajoint/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
"key_hash",
5353
"logger",
5454
"cli",
55+
"ObjectRef",
5556
]
5657

5758
from . import errors
@@ -66,6 +67,7 @@
6667
from .fetch import key
6768
from .hash import key_hash
6869
from .logging import logger
70+
from .objectref import ObjectRef
6971
from .schemas import Schema, VirtualModule, list_schemas
7072
from .settings import config
7173
from .table import FreeTable, Table

src/datajoint/declare.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
INTERNAL_ATTACH=r"attach$",
6565
EXTERNAL_ATTACH=r"attach@(?P<store>[a-z][\-\w]*)$",
6666
FILEPATH=r"filepath@(?P<store>[a-z][\-\w]*)$",
67+
OBJECT=r"object$", # managed object storage (files/folders)
6768
UUID=r"uuid$",
6869
ADAPTED=r"<.+>$",
6970
).items()
@@ -76,6 +77,7 @@
7677
"EXTERNAL_ATTACH",
7778
"EXTERNAL_BLOB",
7879
"FILEPATH",
80+
"OBJECT",
7981
"ADAPTED",
8082
} | set(TYPE_ALIASES)
8183
NATIVE_TYPES = set(TYPE_PATTERN) - SPECIAL_TYPES
@@ -464,6 +466,9 @@ def substitute_special_type(match, category, foreign_key_sql, context):
464466
match["type"] = UUID_DATA_TYPE
465467
elif category == "INTERNAL_ATTACH":
466468
match["type"] = "LONGBLOB"
469+
elif category == "OBJECT":
470+
# Object type stores metadata as JSON - no foreign key to external table
471+
match["type"] = "JSON"
467472
elif category in EXTERNAL_TYPES:
468473
if category == "FILEPATH" and not _support_filepath_types():
469474
raise DataJointError(

src/datajoint/fetch.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212

1313
from . import blob, hash
1414
from .errors import DataJointError
15+
from .objectref import ObjectRef
1516
from .settings import config
17+
from .storage import StorageBackend
1618
from .utils import safe_write
1719

1820

@@ -48,6 +50,15 @@ def _get(connection, attr, data, squeeze, download_path):
4850
"""
4951
if data is None:
5052
return
53+
if attr.is_object:
54+
# Object type - return ObjectRef handle
55+
json_data = json.loads(data) if isinstance(data, str) else data
56+
try:
57+
spec = config.get_object_storage_spec()
58+
backend = StorageBackend(spec)
59+
except DataJointError:
60+
backend = None
61+
return ObjectRef.from_json(json_data, backend=backend)
5162
if attr.json:
5263
return json.loads(data)
5364

src/datajoint/heading.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
is_blob=False,
3333
is_attachment=False,
3434
is_filepath=False,
35+
is_object=False,
3536
is_external=False,
3637
is_hidden=False,
3738
adapter=None,
@@ -136,7 +137,7 @@ def blobs(self):
136137

137138
@property
138139
def non_blobs(self):
139-
return [k for k, v in self.attributes.items() if not (v.is_blob or v.is_attachment or v.is_filepath or v.json)]
140+
return [k for k, v in self.attributes.items() if not (v.is_blob or v.is_attachment or v.is_filepath or v.is_object or v.json)]
140141

141142
@property
142143
def new_attributes(self):
@@ -262,6 +263,7 @@ def _init_from_database(self):
262263
json=bool(TYPE_PATTERN["JSON"].match(attr["type"])),
263264
is_attachment=False,
264265
is_filepath=False,
266+
is_object=False,
265267
adapter=None,
266268
store=None,
267269
is_external=False,
@@ -325,6 +327,7 @@ def _init_from_database(self):
325327
unsupported=False,
326328
is_attachment=category in ("INTERNAL_ATTACH", "EXTERNAL_ATTACH"),
327329
is_filepath=category == "FILEPATH",
330+
is_object=category == "OBJECT",
328331
# INTERNAL_BLOB is not a custom type but is included for completeness
329332
is_blob=category in ("INTERNAL_BLOB", "EXTERNAL_BLOB"),
330333
uuid=category == "UUID",
@@ -337,10 +340,11 @@ def _init_from_database(self):
337340
attr["is_blob"],
338341
attr["is_attachment"],
339342
attr["is_filepath"],
343+
attr["is_object"],
340344
attr["json"],
341345
)
342346
):
343-
raise DataJointError("Json, Blob, attachment, or filepath attributes are not allowed in the primary key")
347+
raise DataJointError("Json, Blob, attachment, filepath, or object attributes are not allowed in the primary key")
344348

345349
if attr["string"] and attr["default"] is not None and attr["default"] not in sql_literals:
346350
attr["default"] = '"%s"' % attr["default"]

0 commit comments

Comments
 (0)