|
1 | 1 | """ |
2 | | -(De)serialization methods for basic datatypes and numpy.ndarrays with provisions for mutual |
3 | | -compatibility with Matlab-based serialization implemented by mYm. |
| 2 | +Binary serialization for DataJoint blob storage. |
| 3 | +
|
| 4 | +Provides (de)serialization for Python/NumPy objects with backward compatibility |
| 5 | +for MATLAB mYm-format blobs. Supports arrays, scalars, structs, cells, and |
| 6 | +Python built-in types (dict, list, tuple, set, datetime, UUID, Decimal). |
4 | 7 | """ |
5 | 8 |
|
| 9 | +from __future__ import annotations |
| 10 | + |
6 | 11 | import collections |
7 | 12 | import datetime |
8 | 13 | import uuid |
@@ -69,31 +74,74 @@ def len_u32(obj): |
69 | 74 |
|
70 | 75 |
|
71 | 76 | class MatCell(np.ndarray): |
72 | | - """a numpy ndarray representing a Matlab cell array""" |
| 77 | + """ |
| 78 | + NumPy ndarray subclass representing a MATLAB cell array. |
| 79 | +
|
| 80 | + Used to distinguish cell arrays from regular arrays during serialization |
| 81 | + for MATLAB compatibility. |
| 82 | + """ |
73 | 83 |
|
74 | 84 | pass |
75 | 85 |
|
76 | 86 |
|
77 | 87 | class MatStruct(np.recarray): |
78 | | - """numpy.recarray representing a Matlab struct array""" |
| 88 | + """ |
| 89 | + NumPy recarray subclass representing a MATLAB struct array. |
| 90 | +
|
| 91 | + Used to distinguish struct arrays from regular recarrays during |
| 92 | + serialization for MATLAB compatibility. |
| 93 | + """ |
79 | 94 |
|
80 | 95 | pass |
81 | 96 |
|
82 | 97 |
|
83 | 98 | class Blob: |
84 | | - def __init__(self, squeeze=False): |
| 99 | + """ |
| 100 | + Binary serializer/deserializer for DataJoint blob storage. |
| 101 | +
|
| 102 | + Handles packing Python objects into binary format and unpacking binary |
| 103 | + data back to Python objects. Supports two protocols: |
| 104 | +
|
| 105 | + - ``mYm``: Original MATLAB-compatible format (default) |
| 106 | + - ``dj0``: Extended format for Python-specific types |
| 107 | +
|
| 108 | + Parameters |
| 109 | + ---------- |
| 110 | + squeeze : bool, optional |
| 111 | + If True, remove singleton dimensions from arrays and convert |
| 112 | + 0-dimensional arrays to scalars. Default False. |
| 113 | +
|
| 114 | + Attributes |
| 115 | + ---------- |
| 116 | + protocol : bytes or None |
| 117 | + Current serialization protocol (``b"mYm\\0"`` or ``b"dj0\\0"``). |
| 118 | + """ |
| 119 | + |
| 120 | + def __init__(self, squeeze: bool = False) -> None: |
85 | 121 | self._squeeze = squeeze |
86 | 122 | self._blob = None |
87 | 123 | self._pos = 0 |
88 | 124 | self.protocol = None |
89 | 125 |
|
90 | | - def set_dj0(self): |
| 126 | + def set_dj0(self) -> None: |
| 127 | + """Switch to dj0 protocol for extended type support.""" |
91 | 128 | self.protocol = b"dj0\0" # when using new blob features |
92 | 129 |
|
93 | | - def squeeze(self, array, convert_to_scalar=True): |
| 130 | + def squeeze(self, array: np.ndarray, convert_to_scalar: bool = True) -> np.ndarray: |
94 | 131 | """ |
95 | | - Simplify the input array - squeeze out all singleton dimensions. |
96 | | - If convert_to_scalar, then convert zero-dimensional arrays to scalars |
| 132 | + Remove singleton dimensions from an array. |
| 133 | +
|
| 134 | + Parameters |
| 135 | + ---------- |
| 136 | + array : np.ndarray |
| 137 | + Input array. |
| 138 | + convert_to_scalar : bool, optional |
| 139 | + If True, convert 0-dimensional arrays to Python scalars. Default True. |
| 140 | +
|
| 141 | + Returns |
| 142 | + ------- |
| 143 | + np.ndarray or scalar |
| 144 | + Squeezed array or scalar value. |
97 | 145 | """ |
98 | 146 | if not self._squeeze: |
99 | 147 | return array |
@@ -233,9 +281,19 @@ def read_array(self): |
233 | 281 | data = data + 1j * self.read_value(dtype, count=n_elem) |
234 | 282 | return self.squeeze(data.reshape(shape, order="F")) |
235 | 283 |
|
236 | | - def pack_array(self, array): |
| 284 | + def pack_array(self, array: np.ndarray) -> bytes: |
237 | 285 | """ |
238 | | - Serialize an np.ndarray into bytes. Scalars are encoded with ndim=0. |
| 286 | + Serialize a NumPy array into bytes. |
| 287 | +
|
| 288 | + Parameters |
| 289 | + ---------- |
| 290 | + array : np.ndarray |
| 291 | + Array to serialize. Scalars are encoded with ndim=0. |
| 292 | +
|
| 293 | + Returns |
| 294 | + ------- |
| 295 | + bytes |
| 296 | + Serialized array data. |
239 | 297 | """ |
240 | 298 | if "datetime64" in array.dtype.name: |
241 | 299 | self.set_dj0() |
@@ -497,10 +555,60 @@ def pack(self, obj, compress): |
497 | 555 | return blob |
498 | 556 |
|
499 | 557 |
|
500 | | -def pack(obj, compress=True): |
| 558 | +def pack(obj, compress: bool = True) -> bytes: |
| 559 | + """ |
| 560 | + Serialize a Python object to binary blob format. |
| 561 | +
|
| 562 | + Parameters |
| 563 | + ---------- |
| 564 | + obj : any |
| 565 | + Object to serialize. Supports NumPy arrays, Python scalars, |
| 566 | + collections (dict, list, tuple, set), datetime objects, UUID, |
| 567 | + Decimal, and MATLAB-compatible MatCell/MatStruct. |
| 568 | + compress : bool, optional |
| 569 | + If True (default), compress blobs larger than 1000 bytes using zlib. |
| 570 | +
|
| 571 | + Returns |
| 572 | + ------- |
| 573 | + bytes |
| 574 | + Serialized binary data. |
| 575 | +
|
| 576 | + Raises |
| 577 | + ------ |
| 578 | + DataJointError |
| 579 | + If the object type is not supported. |
| 580 | +
|
| 581 | + Examples |
| 582 | + -------- |
| 583 | + >>> data = np.array([1, 2, 3]) |
| 584 | + >>> blob = pack(data) |
| 585 | + >>> unpacked = unpack(blob) |
| 586 | + """ |
501 | 587 | return Blob().pack(obj, compress=compress) |
502 | 588 |
|
503 | 589 |
|
504 | | -def unpack(blob, squeeze=False): |
| 590 | +def unpack(blob: bytes, squeeze: bool = False): |
| 591 | + """ |
| 592 | + Deserialize a binary blob to a Python object. |
| 593 | +
|
| 594 | + Parameters |
| 595 | + ---------- |
| 596 | + blob : bytes |
| 597 | + Binary data from ``pack()`` or MATLAB mYm serialization. |
| 598 | + squeeze : bool, optional |
| 599 | + If True, remove singleton dimensions from arrays. Default False. |
| 600 | +
|
| 601 | + Returns |
| 602 | + ------- |
| 603 | + any |
| 604 | + Deserialized Python object. |
| 605 | +
|
| 606 | + Examples |
| 607 | + -------- |
| 608 | + >>> blob = pack({'a': 1, 'b': [1, 2, 3]}) |
| 609 | + >>> data = unpack(blob) |
| 610 | + >>> data['b'] |
| 611 | + [1, 2, 3] |
| 612 | + """ |
505 | 613 | if blob is not None: |
506 | 614 | return Blob(squeeze=squeeze).unpack(blob) |
0 commit comments