Skip to content

Commit f42340e

Browse files
committed
WIP fix tests after merge compressors and filters into codecs
1 parent f80a3dd commit f42340e

14 files changed

+381
-663
lines changed

zarr/__init__.py

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,12 @@
11
# -*- coding: utf-8 -*-
22
# flake8: noqa
33
from __future__ import absolute_import, print_function, division
4-
import multiprocessing
5-
import atexit
64

75

8-
from zarr.core import Array
9-
from zarr.creation import create, array, empty, zeros, ones, full, open, \
10-
empty_like, zeros_like, ones_like, full_like, open_like, open_array
11-
from zarr.storage import DictStore, DirectoryStore, ZipStore, init_array, \
12-
init_group, init_store
13-
from zarr.hierarchy import group, open_group, Group
6+
from zarr.creation import empty, zeros, ones, full, array, empty_like, \
7+
zeros_like, ones_like, full_like, open, open_array, open_like, create
8+
from zarr.storage import DictStore, DirectoryStore, ZipStore
9+
from zarr.hierarchy import group, open_group
1410
from zarr.sync import ThreadSynchronizer, ProcessSynchronizer
11+
from zarr.codecs import *
1512
from zarr.version import version as __version__
16-
from zarr.filters import DeltaFilter, FixedScaleOffsetFilter, \
17-
QuantizeFilter, PackBitsFilter, CategoryFilter
18-
19-
20-
try:
21-
from zarr import blosc
22-
except ImportError: # pragma: no cover
23-
pass
24-
else:
25-
ncores = multiprocessing.cpu_count()
26-
blosc.init()
27-
blosc.set_nthreads(min(8, ncores))
28-
atexit.register(blosc.destroy)

zarr/codecs.py

Lines changed: 98 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import bz2
55
import array
66
import math
7+
import multiprocessing
8+
import atexit
79

810

911
import numpy as np
@@ -13,7 +15,7 @@
1315
from zarr.meta import encode_dtype, decode_dtype
1416

1517

16-
registry = dict()
18+
codec_registry = dict()
1719

1820

1921
def get_codec(config):
@@ -29,18 +31,18 @@ def get_codec(config):
2931
codec : Codec
3032
3133
"""
32-
name = config.pop('name', None)
33-
cls = registry.get(name, None)
34+
codec_id = config.pop('id', None)
35+
cls = codec_registry.get(codec_id, None)
3436
if cls is None:
35-
raise ValueError('codec not available: %r' % name)
37+
raise ValueError('codec not available: %r' % codec_id)
3638
return cls.from_config(config)
3739

3840

3941
class Codec(object):
4042
"""Codec abstract base class."""
4143

4244
# override in sub-class
43-
name = None
45+
id = None
4446

4547
def encode(self, buf):
4648
"""Encode data in `buf`.
@@ -144,7 +146,7 @@ class ZlibCompressor(Codec):
144146
145147
"""
146148

147-
name = 'zlib'
149+
codec_id = 'zlib'
148150

149151
def __init__(self, level=-1):
150152
self.level = level
@@ -169,13 +171,17 @@ def decode(self, buf, out=None):
169171

170172
def get_config(self):
171173
config = dict()
172-
config['name'] = self.name
174+
config['id'] = self.codec_id
173175
config['level'] = self.level
174176
return config
175177

178+
def __repr__(self):
179+
r = '%s(level=%s)' % (type(self).__name__, self.level)
180+
return r
176181

177-
registry[ZlibCompressor.name] = ZlibCompressor
178-
registry['gzip'] = ZlibCompressor # alias
182+
183+
codec_registry[ZlibCompressor.codec_id] = ZlibCompressor
184+
codec_registry['gzip'] = ZlibCompressor # alias
179185

180186

181187
class BZ2Compressor(Codec):
@@ -188,7 +194,7 @@ class BZ2Compressor(Codec):
188194
189195
"""
190196

191-
name = 'bz2'
197+
codec_id = 'bz2'
192198

193199
def __init__(self, level=9):
194200
self.level = level
@@ -218,12 +224,16 @@ def decode(self, buf, out=None):
218224

219225
def get_config(self):
220226
config = dict()
221-
config['name'] = self.name
227+
config['id'] = self.codec_id
222228
config['level'] = self.level
223229
return config
224230

231+
def __repr__(self):
232+
r = '%s(level=%s)' % (type(self).__name__, self.level)
233+
return r
234+
225235

226-
registry[BZ2Compressor.name] = BZ2Compressor
236+
codec_registry[BZ2Compressor.codec_id] = BZ2Compressor
227237

228238

229239
try:
@@ -252,7 +262,7 @@ class LZMACompressor(Codec):
252262
253263
"""
254264

255-
name = 'lzma'
265+
codec_id = 'lzma'
256266

257267
def __init__(self, format=lzma.FORMAT_XZ, check=-1, preset=None,
258268
filters=None):
@@ -289,14 +299,20 @@ def decode(self, buf, out=None):
289299

290300
def get_config(self):
291301
config = dict()
292-
config['name'] = self.name
302+
config['id'] = self.codec_id
293303
config['format'] = self.format
294304
config['check'] = self.check
295305
config['preset'] = self.preset
296306
config['filters'] = self.filters
297307
return config
298308

299-
registry[LZMACompressor.name] = LZMACompressor
309+
def __repr__(self):
310+
r = '%s(format=%r, check=%r, preset=%r, filters=%r)' % \
311+
(type(self).__name__, self.format, self.check, self.preset,
312+
self.filters)
313+
return r
314+
315+
codec_registry[LZMACompressor.codec_id] = LZMACompressor
300316

301317
try:
302318
from zarr import blosc
@@ -319,7 +335,7 @@ class BloscCompressor(Codec):
319335
320336
"""
321337

322-
name = 'blosc'
338+
codec_id = 'blosc'
323339

324340
def __init__(self, cname='lz4', clevel=5, shuffle=1):
325341
if isinstance(cname, text_type):
@@ -336,13 +352,25 @@ def decode(self, buf, out=None):
336352

337353
def get_config(self):
338354
config = dict()
339-
config['name'] = self.name
355+
config['id'] = self.codec_id
340356
config['cname'] = text_type(self.cname, 'ascii')
341357
config['clevel'] = self.clevel
342358
config['shuffle'] = self.shuffle
343359
return config
344360

345-
registry[BloscCompressor.name] = BloscCompressor
361+
def __repr__(self):
362+
r = '%s(cname=%r, clevel=%r, shuffle=%r)' % \
363+
(type(self).__name__, text_type(self.cname, 'ascii'),
364+
self.clevel, self.shuffle)
365+
return r
366+
367+
codec_registry[BloscCompressor.codec_id] = BloscCompressor
368+
369+
# initialize blosc
370+
ncores = multiprocessing.cpu_count()
371+
blosc.init()
372+
blosc.set_nthreads(min(8, ncores))
373+
atexit.register(blosc.destroy)
346374

347375

348376
def _ndarray_from_buffer(buf, dtype):
@@ -387,7 +415,7 @@ class DeltaFilter(Codec):
387415
388416
""" # flake8: noqa
389417

390-
name = 'delta'
418+
codec_id = 'delta'
391419

392420
def __init__(self, dtype, astype=None):
393421
self.dtype = np.dtype(dtype)
@@ -437,7 +465,7 @@ def decode(self, buf, out=None):
437465

438466
def get_config(self):
439467
config = dict()
440-
config['name'] = self.name
468+
config['id'] = self.codec_id
441469
config['dtype'] = encode_dtype(self.dtype)
442470
config['astype'] = encode_dtype(self.astype)
443471
return config
@@ -448,8 +476,15 @@ def from_config(cls, config):
448476
astype = decode_dtype(config['astype'])
449477
return cls(dtype=dtype, astype=astype)
450478

479+
def __repr__(self):
480+
r = '%s(dtype=%s' % (type(self).__name__, self.dtype)
481+
if self.astype != self.dtype:
482+
r += ', astype=%s' % self.astype
483+
r += ')'
484+
return r
485+
451486

452-
registry[DeltaFilter.name] = DeltaFilter
487+
codec_registry[DeltaFilter.codec_id] = DeltaFilter
453488

454489

455490
class FixedScaleOffsetFilter(Codec):
@@ -514,7 +549,7 @@ class FixedScaleOffsetFilter(Codec):
514549
515550
""" # flake8: noqa
516551

517-
name = 'fixedscaleoffset'
552+
codec_id = 'fixedscaleoffset'
518553

519554
def __init__(self, offset, scale, dtype, astype=None):
520555
self.offset = offset
@@ -557,7 +592,7 @@ def decode(self, buf, out=None):
557592

558593
def get_config(self):
559594
config = dict()
560-
config['name'] = self.name
595+
config['id'] = self.codec_id
561596
config['astype'] = encode_dtype(self.astype)
562597
config['dtype'] = encode_dtype(self.dtype)
563598
config['scale'] = self.scale
@@ -573,8 +608,15 @@ def from_config(cls, config):
573608
return cls(astype=astype, dtype=dtype, scale=scale,
574609
offset=offset)
575610

611+
def __repr__(self):
612+
r = '%s(scale=%s, offset=%s, dtype=%s' % \
613+
(type(self).__name__, self.scale, self.offset, self.dtype)
614+
if self.astype != self.dtype:
615+
r += ', astype=%s' % self.astype
616+
r += ')'
617+
return r
576618

577-
registry[FixedScaleOffsetFilter.name] = FixedScaleOffsetFilter
619+
codec_registry[FixedScaleOffsetFilter.codec_id] = FixedScaleOffsetFilter
578620

579621

580622
class QuantizeFilter(Codec):
@@ -615,7 +657,7 @@ class QuantizeFilter(Codec):
615657
616658
"""
617659

618-
name = 'quantize'
660+
codec_id = 'quantize'
619661

620662
def __init__(self, digits, dtype, astype=None):
621663
self.digits = digits
@@ -656,7 +698,7 @@ def decode(self, buf, out=None):
656698

657699
def get_config(self):
658700
config = dict()
659-
config['name'] = self.filter_name
701+
config['id'] = self.codec_id
660702
config['digits'] = self.digits
661703
config['dtype'] = encode_dtype(self.dtype)
662704
config['astype'] = encode_dtype(self.astype)
@@ -669,8 +711,16 @@ def from_config(cls, config):
669711
digits = config['digits']
670712
return cls(digits=digits, dtype=dtype, astype=astype)
671713

714+
def __repr__(self):
715+
r = '%s(digits=%s, dtype=%s' % \
716+
(type(self).__name__, self.digits, self.dtype)
717+
if self.astype != self.dtype:
718+
r += ', astype=%s' % self.astype
719+
r += ')'
720+
return r
672721

673-
registry[QuantizeFilter.name] = QuantizeFilter
722+
723+
codec_registry[QuantizeFilter.codec_id] = QuantizeFilter
674724

675725

676726
class PackBitsFilter(Codec):
@@ -696,7 +746,7 @@ class PackBitsFilter(Codec):
696746
697747
"""
698748

699-
name = 'packbits'
749+
codec_id = 'packbits'
700750

701751
def __init__(self):
702752
pass
@@ -751,15 +801,19 @@ def decode(self, buf, out=None):
751801

752802
def get_config(self):
753803
config = dict()
754-
config['name'] = self.name
804+
config['id'] = self.codec_id
755805
return config
756806

757807
@classmethod
758808
def from_config(cls, config):
759809
return cls()
760810

811+
def __repr__(self):
812+
r = '%s()' % type(self).__name__
813+
return r
814+
761815

762-
registry[PackBitsFilter.name] = PackBitsFilter
816+
codec_registry[PackBitsFilter.codec_id] = PackBitsFilter
763817

764818

765819
def _ensure_bytes(l):
@@ -791,7 +845,7 @@ class CategorizeFilter(Codec):
791845
>>> x
792846
array([b'male', b'female', b'female', b'male', b'unexpected'],
793847
dtype='|S10')
794-
>>> f = zarr.CategoryFilter(labels=[b'female', b'male'], dtype=x.dtype)
848+
>>> f = zarr.CategorizeFilter(labels=[b'female', b'male'], dtype=x.dtype)
795849
>>> y = f.encode(x)
796850
>>> y
797851
array([2, 1, 1, 2, 0], dtype=uint8)
@@ -802,7 +856,7 @@ class CategorizeFilter(Codec):
802856
803857
"""
804858

805-
name = 'categorize'
859+
codec_id = 'categorize'
806860

807861
def __init__(self, labels, dtype, astype='u1'):
808862
self.labels = [_ensure_bytes(l) for l in labels]
@@ -845,7 +899,7 @@ def decode(self, buf, out=None):
845899

846900
def get_config(self):
847901
config = dict()
848-
config['name'] = self.name
902+
config['id'] = self.codec_id
849903
config['labels'] = [text_type(l, 'ascii') for l in self.labels]
850904
config['dtype'] = encode_dtype(self.dtype)
851905
config['astype'] = encode_dtype(self.astype)
@@ -858,5 +912,15 @@ def from_config(cls, config):
858912
labels = config['labels']
859913
return cls(labels=labels, dtype=dtype, astype=astype)
860914

915+
def __repr__(self):
916+
r = '%s(dtype=%s, astype=%s, labels=%r)' % \
917+
(type(self).__name__, self.dtype, self.astype, self.labels)
918+
return r
919+
920+
921+
codec_registry[CategorizeFilter.codec_id] = CategorizeFilter
922+
861923

862-
registry[CategorizeFilter.name] = CategorizeFilter
924+
__all__ = ['get_codec', 'codec_registry']
925+
for _cls in codec_registry.values():
926+
__all__.append(_cls.__name__)

0 commit comments

Comments
 (0)