Skip to content

Commit 36300ff

Browse files
authored
Python decode function (#2563)
1 parent 808dade commit 36300ff

36 files changed

+955
-571
lines changed

docs/api/python/arrays.rst

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ functionality.
8585

8686
.. autoclass:: vortex.Float64TypeArray
8787
:members:
88-
88+
8989
.. autoclass:: vortex.Utf8TypeArray
9090
:members:
9191

@@ -201,5 +201,23 @@ Compressed Encodings
201201
Pluggable Encodings
202202
-------------------
203203

204-
.. autoclass:: vortex.PyEncoding
204+
Subclasses of :class:`~vortex.PyArray` can be used to implement custom Vortex encodings in Python. These encodings
205+
can be registered with the :attr:`~vortex.registry` so they are available to use when reading Vortex files.
206+
207+
.. autoclass:: vortex.PyArray
208+
:members:
209+
210+
211+
Registry and Serde
212+
------------------
213+
214+
.. autodata:: vortex.registry
215+
216+
.. autoclass:: vortex.Registry
217+
:members:
218+
219+
.. autoclass:: vortex.ArrayContext
220+
:members:
221+
222+
.. autoclass:: vortex.ArrayParts
205223
:members:

pyvortex/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,6 @@ dev = [
4040
"mypy>=1.14.1",
4141
"numpy>=2.2.2",
4242
"pandas-stubs>=2.2.3.241126",
43+
"pcodec>=0.3.3",
4344
"pyarrow-stubs>=17.16",
4445
]

pyvortex/python/vortex/__init__.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from . import _lib
2-
from ._lib import register
32
from ._lib.arrays import (
43
AlpArray,
54
AlpRdArray,
@@ -32,7 +31,6 @@
3231
NullTypeArray,
3332
PrimitiveArray,
3433
PrimitiveTypeArray,
35-
PyEncoding,
3634
RunEndArray,
3735
SparseArray,
3836
StructArray,
@@ -68,6 +66,7 @@
6866
uint,
6967
utf8,
7068
)
69+
from ._lib.registry import Registry
7170
from ._lib.scalar import (
7271
BinaryScalar,
7372
BoolScalar,
@@ -80,16 +79,17 @@
8079
Utf8Scalar,
8180
scalar,
8281
)
83-
from .arrays import Array, array
82+
from ._lib.serde import ArrayContext, ArrayParts
83+
from .arrays import Array, PyArray, array
8484

8585
assert _lib, "Ensure we eagerly import the Vortex native library"
8686

8787
__all__ = [
8888
"array",
8989
"compress",
90-
"register",
9190
# Arrays
9291
"Array",
92+
"PyArray",
9393
"NullTypeArray",
9494
"BoolTypeArray",
9595
"PrimitiveTypeArray",
@@ -143,7 +143,6 @@
143143
"StructArray",
144144
"ListArray",
145145
"ExtensionArray",
146-
"PyEncoding",
147146
"AlpArray",
148147
"AlpRdArray",
149148
"DateTimePartsArray",
@@ -166,4 +165,11 @@
166165
"StructScalar",
167166
"ListScalar",
168167
"ExtensionScalar",
168+
# Registry + Serde
169+
"Registry",
170+
"ArrayContext",
171+
"ArrayParts",
169172
]
173+
174+
#: The default registry for Vortex
175+
registry = Registry()
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
import vortex as vx
2-
3-
def register(cls: type[vx.PyEncoding]): ...

pyvortex/python/vortex/_lib/arrays.pyi

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class Array:
1717
def from_arrow(obj: pa.Array | pa.ChunkedArray | pa.Table) -> Array: ...
1818
def to_arrow_array(self) -> pa.Array: ...
1919
@property
20-
def encoding(self) -> str: ...
20+
def id(self) -> str: ...
2121
@property
2222
def nbytes(self) -> int: ...
2323
@property
@@ -34,6 +34,9 @@ class Array:
3434
def to_polars_dataframe(self) -> pl.DataFrame: ...
3535
def to_polars_series(self) -> pl.Series: ...
3636
def to_pylist(self) -> list: ...
37+
def serialize(self, ctx: vx.ArrayContext) -> bytes: ...
38+
39+
class NativeArray(vx.Array): ...
3740

3841
@final
3942
class NullArray(vx.Array): ...
@@ -60,8 +63,6 @@ class ListArray(vx.Array): ...
6063
@final
6164
class ExtensionArray(vx.Array): ...
6265

63-
class PyEncoding(vx.Array): ...
64-
6566
@final
6667
class ConstantArray(vx.Array):
6768
def scalar(self) -> vx.Scalar: ...

pyvortex/python/vortex/_lib/mypy.allowlist

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ vortex\._lib\.dataset
88
vortex\._lib\.dtype
99
vortex\._lib\.expr
1010
vortex\._lib\.io
11+
vortex\._lib\.registry
1112
vortex\._lib\.scalar
13+
vortex\._lib\.serde
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from typing import final
2+
3+
import vortex as vx
4+
5+
@final
6+
class Registry:
7+
def register(self, cls: type[vx.Array]): ...
8+
def array_ctx(self, encodings: list[type[vx.Array] | str]) -> vx.ArrayContext: ...
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from typing import final
2+
3+
import pyarrow as pa
4+
5+
import vortex as vx
6+
7+
@final
8+
class ArrayParts:
9+
@staticmethod
10+
def parse(data: bytes) -> ArrayParts: ...
11+
@property
12+
def metadata(self) -> bytes | None: ...
13+
@property
14+
def nbuffers(self) -> int: ...
15+
@property
16+
def buffers(self) -> list[pa.Buffer]: ...
17+
@property
18+
def nchildren(self) -> int: ...
19+
@property
20+
def children(self) -> list[ArrayParts]: ...
21+
def decode(self, ctx: ArrayContext, dtype: vx.DType, len: int) -> pa.Array: ...
22+
23+
@final
24+
class ArrayContext:
25+
def __len__(self): ...

pyvortex/python/vortex/arrays.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import abc
12
from typing import TYPE_CHECKING, Any
23

34
import pyarrow
45

6+
import vortex as vx
57
from vortex._lib import arrays as _arrays
68

79
try:
@@ -380,3 +382,23 @@ def array(obj: pyarrow.Array | list | Any) -> Array:
380382
except ImportError:
381383
pass
382384
return Array.from_arrow(obj)
385+
386+
387+
class PyArray(Array, metaclass=abc.ABCMeta):
388+
"""Abstract base class for Python-based Vortex arrays."""
389+
390+
id: str
391+
392+
@abc.abstractmethod
393+
def __len__(self) -> int:
394+
"""Return the logical length of the array."""
395+
396+
@classmethod
397+
@abc.abstractmethod
398+
def decode(cls, parts: vx.ArrayParts, ctx: vx.ArrayContext, dtype: vx.DType, len: int) -> Array:
399+
"""Decode an array from its component parts.
400+
401+
:class:`ArrayParts` contains the metadata, buffers and child :class:`ArrayParts` that represent the
402+
current array. Implementations of this function should validate this information, and then construct
403+
a new array.
404+
"""

pyvortex/python/vortex/serde.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from vortex._lib.serde import ArrayContext, ArrayParts
2+
3+
__all__ = ["ArrayParts", "ArrayContext"]

0 commit comments

Comments
 (0)