Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.06/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 2,
"delta_spec": "auto",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.06/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.07/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "try_lookback",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.07/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.08/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "none",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.08/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.09/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 1,
"delta_spec": "try_consecutive",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.09/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.09.dat
Binary file not shown.
68 changes: 51 additions & 17 deletions numcodecs/pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numcodecs.compat import ensure_contiguous_ndarray

try:
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
except ImportError: # pragma: no cover
standalone = None

Expand All @@ -27,14 +27,17 @@ class PCodec(Codec):
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
delta_encoding_order : init or None
Either a delta encoding level from 0-7 or None. If set to None, pcodec
will try to infer the optimal delta encoding order.
mode_spec : {'auto', 'classic'}
mode_spec : {"auto", "classic"}
Configures whether Pcodec should try to infer the best "mode" or
structure of the data (e.g. approximate multiples of 0.1) to improve
compression ratio, or skip this step and just use the numbers as-is
(Classic mode).
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
Configures the delta encoding strategy. By default, uses "auto" which
will try to infer the best encoding order.
delta_encoding_order : int or None
Explicit delta encoding level from 0-7. Only valid if delta_spec is
"try_consecutive" or None.
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""
Expand All @@ -44,39 +47,70 @@ class PCodec(Codec):
def __init__(
self,
level: int = 8,
mode_spec: Literal["auto", "classic"] = "auto",
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
delta_encoding_order: Optional[int] = None,
equal_pages_up_to: int = 262144,
# TODO one day, add support for the Try* mode specs
mode_spec: Literal['auto', 'classic'] = 'auto',
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
):
if standalone is None: # pragma: no cover
raise ImportError("pcodec must be installed to use the PCodec codec.")

# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.mode_spec = mode_spec
self.delta_spec = delta_spec
self.paging_spec = paging_spec
self.delta_encoding_order = delta_encoding_order
self.equal_pages_up_to = equal_pages_up_to
self.mode_spec = mode_spec

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)

def _get_chunk_config(self):
match self.mode_spec:
case 'auto':
case "auto" | None:
mode_spec = ModeSpec.auto()
case 'classic':
case "classic":
mode_spec = ModeSpec.classic()
case _:
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
raise ValueError(f"mode_spec {self.mode_spec} is not supported")

if self.delta_encoding_order is not None and self.delta_spec == "auto":
# backwards compat for before delta_spec was introduced
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
raise ValueError(
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
)
else:
match self.delta_spec:
case "auto" | None:
delta_spec = DeltaSpec.auto()
case "none":
delta_spec = DeltaSpec.none()
case "try_consecutive":
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
case "try_lookback":
delta_spec = DeltaSpec.try_lookback()
case _:
raise ValueError(f"delta_spec {self.delta_spec} is not supported")

match self.paging_spec:
case "equal_pages_up_to" | None:
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
case _:
raise ValueError(f"paging_spec {self.paging_spec} is not supported")

config = ChunkConfig(
compression_level=self.level,
delta_encoding_order=self.delta_encoding_order,
delta_spec=delta_spec,
mode_spec=mode_spec,
paging_spec=paging_spec,
)
return config

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)
config = self._get_chunk_config()
return standalone.simple_compress(buf, config)

def decode(self, buf, out=None):
Expand Down
19 changes: 16 additions & 3 deletions numcodecs/tests/test_pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
PCodec(level=1),
PCodec(level=5),
PCodec(level=9),
PCodec(mode_spec='classic'),
PCodec(mode_spec="classic"),
PCodec(equal_pages_up_to=300),
PCodec(delta_encoding_order=2),
PCodec(delta_spec="try_lookback"),
PCodec(delta_spec="none"),
PCodec(delta_spec="try_consecutive", delta_encoding_order=1),
]


Expand Down Expand Up @@ -57,14 +61,23 @@ def test_config():


def test_invalid_config_error():
codec = PCodec(mode_spec='bogus')
for param in ["mode_spec", "delta_spec", "paging_spec"]:
codec = PCodec(**{param: "bogus"})
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_invalid_delta_encoding_combo():
codec = PCodec(delta_encoding_order=2, delta_spec="none")
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_repr():
check_repr(
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
"PCodec(delta_encoding_order=None, delta_spec='auto',"
" equal_pages_up_to=262144, level=3, mode_spec='auto',"
" paging_spec='equal_pages_up_to')"
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ zfpy = [
"numpy<2.0.0",
]
pcodec = [
"pcodec>=0.2,<0.3",
"pcodec>=0.3,<0.4",
]
crc32c = [
"crc32c>=2.7",
Expand Down
Loading