Skip to content

Commit 96e2edf

Browse files
docs: Continue docstring harmonization to NumPy style (batch 2)
Harmonized modules: - declare.py - Table definition parsing - blob.py - Binary serialization - storage.py - fsspec storage backend - codecs.py - Codec type system - dependencies.py - FK dependency graph - diagram.py - Schema diagram visualization Also includes formatting fixes from pre-commit hooks for expression.py and other modules with long function signatures. All modules now use: - NumPy-style docstrings (Parameters, Returns, Raises, etc.) - `from __future__ import annotations` for deferred evaluation - Python 3.10+ type hints (X | None, list[str]) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 66f143e commit 96e2edf

File tree

12 files changed

+1138
-417
lines changed

12 files changed

+1138
-417
lines changed

src/datajoint/blob.py

Lines changed: 121 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
"""
2-
(De)serialization methods for basic datatypes and numpy.ndarrays with provisions for mutual
3-
compatibility with Matlab-based serialization implemented by mYm.
2+
Binary serialization for DataJoint blob storage.
3+
4+
Provides (de)serialization for Python/NumPy objects with backward compatibility
5+
for MATLAB mYm-format blobs. Supports arrays, scalars, structs, cells, and
6+
Python built-in types (dict, list, tuple, set, datetime, UUID, Decimal).
47
"""
58

9+
from __future__ import annotations
10+
611
import collections
712
import datetime
813
import uuid
@@ -69,31 +74,74 @@ def len_u32(obj):
6974

7075

7176
class MatCell(np.ndarray):
72-
"""a numpy ndarray representing a Matlab cell array"""
77+
"""
78+
NumPy ndarray subclass representing a MATLAB cell array.
79+
80+
Used to distinguish cell arrays from regular arrays during serialization
81+
for MATLAB compatibility.
82+
"""
7383

7484
pass
7585

7686

7787
class MatStruct(np.recarray):
78-
"""numpy.recarray representing a Matlab struct array"""
88+
"""
89+
NumPy recarray subclass representing a MATLAB struct array.
90+
91+
Used to distinguish struct arrays from regular recarrays during
92+
serialization for MATLAB compatibility.
93+
"""
7994

8095
pass
8196

8297

8398
class Blob:
84-
def __init__(self, squeeze=False):
99+
"""
100+
Binary serializer/deserializer for DataJoint blob storage.
101+
102+
Handles packing Python objects into binary format and unpacking binary
103+
data back to Python objects. Supports two protocols:
104+
105+
- ``mYm``: Original MATLAB-compatible format (default)
106+
- ``dj0``: Extended format for Python-specific types
107+
108+
Parameters
109+
----------
110+
squeeze : bool, optional
111+
If True, remove singleton dimensions from arrays and convert
112+
0-dimensional arrays to scalars. Default False.
113+
114+
Attributes
115+
----------
116+
protocol : bytes or None
117+
Current serialization protocol (``b"mYm\\0"`` or ``b"dj0\\0"``).
118+
"""
119+
120+
def __init__(self, squeeze: bool = False) -> None:
85121
self._squeeze = squeeze
86122
self._blob = None
87123
self._pos = 0
88124
self.protocol = None
89125

90-
def set_dj0(self):
126+
def set_dj0(self) -> None:
127+
"""Switch to dj0 protocol for extended type support."""
91128
self.protocol = b"dj0\0" # when using new blob features
92129

93-
def squeeze(self, array, convert_to_scalar=True):
130+
def squeeze(self, array: np.ndarray, convert_to_scalar: bool = True) -> np.ndarray:
94131
"""
95-
Simplify the input array - squeeze out all singleton dimensions.
96-
If convert_to_scalar, then convert zero-dimensional arrays to scalars
132+
Remove singleton dimensions from an array.
133+
134+
Parameters
135+
----------
136+
array : np.ndarray
137+
Input array.
138+
convert_to_scalar : bool, optional
139+
If True, convert 0-dimensional arrays to Python scalars. Default True.
140+
141+
Returns
142+
-------
143+
np.ndarray or scalar
144+
Squeezed array or scalar value.
97145
"""
98146
if not self._squeeze:
99147
return array
@@ -233,9 +281,19 @@ def read_array(self):
233281
data = data + 1j * self.read_value(dtype, count=n_elem)
234282
return self.squeeze(data.reshape(shape, order="F"))
235283

236-
def pack_array(self, array):
284+
def pack_array(self, array: np.ndarray) -> bytes:
237285
"""
238-
Serialize an np.ndarray into bytes. Scalars are encoded with ndim=0.
286+
Serialize a NumPy array into bytes.
287+
288+
Parameters
289+
----------
290+
array : np.ndarray
291+
Array to serialize. Scalars are encoded with ndim=0.
292+
293+
Returns
294+
-------
295+
bytes
296+
Serialized array data.
239297
"""
240298
if "datetime64" in array.dtype.name:
241299
self.set_dj0()
@@ -497,10 +555,60 @@ def pack(self, obj, compress):
497555
return blob
498556

499557

500-
def pack(obj, compress=True):
558+
def pack(obj, compress: bool = True) -> bytes:
559+
"""
560+
Serialize a Python object to binary blob format.
561+
562+
Parameters
563+
----------
564+
obj : any
565+
Object to serialize. Supports NumPy arrays, Python scalars,
566+
collections (dict, list, tuple, set), datetime objects, UUID,
567+
Decimal, and MATLAB-compatible MatCell/MatStruct.
568+
compress : bool, optional
569+
If True (default), compress blobs larger than 1000 bytes using zlib.
570+
571+
Returns
572+
-------
573+
bytes
574+
Serialized binary data.
575+
576+
Raises
577+
------
578+
DataJointError
579+
If the object type is not supported.
580+
581+
Examples
582+
--------
583+
>>> data = np.array([1, 2, 3])
584+
>>> blob = pack(data)
585+
>>> unpacked = unpack(blob)
586+
"""
501587
return Blob().pack(obj, compress=compress)
502588

503589

504-
def unpack(blob, squeeze=False):
590+
def unpack(blob: bytes, squeeze: bool = False):
591+
"""
592+
Deserialize a binary blob to a Python object.
593+
594+
Parameters
595+
----------
596+
blob : bytes
597+
Binary data from ``pack()`` or MATLAB mYm serialization.
598+
squeeze : bool, optional
599+
If True, remove singleton dimensions from arrays. Default False.
600+
601+
Returns
602+
-------
603+
any
604+
Deserialized Python object.
605+
606+
Examples
607+
--------
608+
>>> blob = pack({'a': 1, 'b': [1, 2, 3]})
609+
>>> data = unpack(blob)
610+
>>> data['b']
611+
[1, 2, 3]
612+
"""
505613
if blob is not None:
506614
return Blob(squeeze=squeeze).unpack(blob)

0 commit comments

Comments
 (0)