4
4
import bz2
5
5
import array
6
6
import math
7
+ import multiprocessing
8
+ import atexit
7
9
8
10
9
11
import numpy as np
13
15
from zarr .meta import encode_dtype , decode_dtype
14
16
15
17
16
- registry = dict ()
18
+ codec_registry = dict ()
17
19
18
20
19
21
def get_codec (config ):
@@ -29,18 +31,18 @@ def get_codec(config):
29
31
codec : Codec
30
32
31
33
"""
32
- name = config .pop ('name ' , None )
33
- cls = registry .get (name , None )
34
+ codec_id = config .pop ('id ' , None )
35
+ cls = codec_registry .get (codec_id , None )
34
36
if cls is None :
35
- raise ValueError ('codec not available: %r' % name )
37
+ raise ValueError ('codec not available: %r' % codec_id )
36
38
return cls .from_config (config )
37
39
38
40
39
41
class Codec (object ):
40
42
"""Codec abstract base class."""
41
43
42
44
# override in sub-class
43
- name = None
45
+ id = None
44
46
45
47
def encode (self , buf ):
46
48
"""Encode data in `buf`.
@@ -144,7 +146,7 @@ class ZlibCompressor(Codec):
144
146
145
147
"""
146
148
147
- name = 'zlib'
149
+ codec_id = 'zlib'
148
150
149
151
def __init__ (self , level = - 1 ):
150
152
self .level = level
@@ -169,13 +171,17 @@ def decode(self, buf, out=None):
169
171
170
172
def get_config (self ):
171
173
config = dict ()
172
- config ['name ' ] = self .name
174
+ config ['id ' ] = self .codec_id
173
175
config ['level' ] = self .level
174
176
return config
175
177
178
+ def __repr__ (self ):
179
+ r = '%s(level=%s)' % (type (self ).__name__ , self .level )
180
+ return r
176
181
177
- registry [ZlibCompressor .name ] = ZlibCompressor
178
- registry ['gzip' ] = ZlibCompressor # alias
182
+
183
+ codec_registry [ZlibCompressor .codec_id ] = ZlibCompressor
184
+ codec_registry ['gzip' ] = ZlibCompressor # alias
179
185
180
186
181
187
class BZ2Compressor (Codec ):
@@ -188,7 +194,7 @@ class BZ2Compressor(Codec):
188
194
189
195
"""
190
196
191
- name = 'bz2'
197
+ codec_id = 'bz2'
192
198
193
199
def __init__ (self , level = 9 ):
194
200
self .level = level
@@ -218,12 +224,16 @@ def decode(self, buf, out=None):
218
224
219
225
def get_config (self ):
220
226
config = dict ()
221
- config ['name ' ] = self .name
227
+ config ['id ' ] = self .codec_id
222
228
config ['level' ] = self .level
223
229
return config
224
230
231
+ def __repr__ (self ):
232
+ r = '%s(level=%s)' % (type (self ).__name__ , self .level )
233
+ return r
234
+
225
235
226
- registry [BZ2Compressor .name ] = BZ2Compressor
236
+ codec_registry [BZ2Compressor .codec_id ] = BZ2Compressor
227
237
228
238
229
239
try :
@@ -252,7 +262,7 @@ class LZMACompressor(Codec):
252
262
253
263
"""
254
264
255
- name = 'lzma'
265
+ codec_id = 'lzma'
256
266
257
267
def __init__ (self , format = lzma .FORMAT_XZ , check = - 1 , preset = None ,
258
268
filters = None ):
@@ -289,14 +299,20 @@ def decode(self, buf, out=None):
289
299
290
300
def get_config (self ):
291
301
config = dict ()
292
- config ['name ' ] = self .name
302
+ config ['id ' ] = self .codec_id
293
303
config ['format' ] = self .format
294
304
config ['check' ] = self .check
295
305
config ['preset' ] = self .preset
296
306
config ['filters' ] = self .filters
297
307
return config
298
308
299
- registry [LZMACompressor .name ] = LZMACompressor
309
+ def __repr__ (self ):
310
+ r = '%s(format=%r, check=%r, preset=%r, filters=%r)' % \
311
+ (type (self ).__name__ , self .format , self .check , self .preset ,
312
+ self .filters )
313
+ return r
314
+
315
+ codec_registry [LZMACompressor .codec_id ] = LZMACompressor
300
316
301
317
try :
302
318
from zarr import blosc
@@ -319,7 +335,7 @@ class BloscCompressor(Codec):
319
335
320
336
"""
321
337
322
- name = 'blosc'
338
+ codec_id = 'blosc'
323
339
324
340
def __init__ (self , cname = 'lz4' , clevel = 5 , shuffle = 1 ):
325
341
if isinstance (cname , text_type ):
@@ -336,13 +352,25 @@ def decode(self, buf, out=None):
336
352
337
353
def get_config (self ):
338
354
config = dict ()
339
- config ['name ' ] = self .name
355
+ config ['id ' ] = self .codec_id
340
356
config ['cname' ] = text_type (self .cname , 'ascii' )
341
357
config ['clevel' ] = self .clevel
342
358
config ['shuffle' ] = self .shuffle
343
359
return config
344
360
345
- registry [BloscCompressor .name ] = BloscCompressor
361
+ def __repr__ (self ):
362
+ r = '%s(cname=%r, clevel=%r, shuffle=%r)' % \
363
+ (type (self ).__name__ , text_type (self .cname , 'ascii' ),
364
+ self .clevel , self .shuffle )
365
+ return r
366
+
367
+ codec_registry [BloscCompressor .codec_id ] = BloscCompressor
368
+
369
+ # initialize blosc
370
+ ncores = multiprocessing .cpu_count ()
371
+ blosc .init ()
372
+ blosc .set_nthreads (min (8 , ncores ))
373
+ atexit .register (blosc .destroy )
346
374
347
375
348
376
def _ndarray_from_buffer (buf , dtype ):
@@ -387,7 +415,7 @@ class DeltaFilter(Codec):
387
415
388
416
""" # flake8: noqa
389
417
390
- name = 'delta'
418
+ codec_id = 'delta'
391
419
392
420
def __init__ (self , dtype , astype = None ):
393
421
self .dtype = np .dtype (dtype )
@@ -437,7 +465,7 @@ def decode(self, buf, out=None):
437
465
438
466
def get_config (self ):
439
467
config = dict ()
440
- config ['name ' ] = self .name
468
+ config ['id ' ] = self .codec_id
441
469
config ['dtype' ] = encode_dtype (self .dtype )
442
470
config ['astype' ] = encode_dtype (self .astype )
443
471
return config
@@ -448,8 +476,15 @@ def from_config(cls, config):
448
476
astype = decode_dtype (config ['astype' ])
449
477
return cls (dtype = dtype , astype = astype )
450
478
479
+ def __repr__ (self ):
480
+ r = '%s(dtype=%s' % (type (self ).__name__ , self .dtype )
481
+ if self .astype != self .dtype :
482
+ r += ', astype=%s' % self .astype
483
+ r += ')'
484
+ return r
485
+
451
486
452
- registry [DeltaFilter .name ] = DeltaFilter
487
+ codec_registry [DeltaFilter .codec_id ] = DeltaFilter
453
488
454
489
455
490
class FixedScaleOffsetFilter (Codec ):
@@ -514,7 +549,7 @@ class FixedScaleOffsetFilter(Codec):
514
549
515
550
""" # flake8: noqa
516
551
517
- name = 'fixedscaleoffset'
552
+ codec_id = 'fixedscaleoffset'
518
553
519
554
def __init__ (self , offset , scale , dtype , astype = None ):
520
555
self .offset = offset
@@ -557,7 +592,7 @@ def decode(self, buf, out=None):
557
592
558
593
def get_config (self ):
559
594
config = dict ()
560
- config ['name ' ] = self .name
595
+ config ['id ' ] = self .codec_id
561
596
config ['astype' ] = encode_dtype (self .astype )
562
597
config ['dtype' ] = encode_dtype (self .dtype )
563
598
config ['scale' ] = self .scale
@@ -573,8 +608,15 @@ def from_config(cls, config):
573
608
return cls (astype = astype , dtype = dtype , scale = scale ,
574
609
offset = offset )
575
610
611
+ def __repr__ (self ):
612
+ r = '%s(scale=%s, offset=%s, dtype=%s' % \
613
+ (type (self ).__name__ , self .scale , self .offset , self .dtype )
614
+ if self .astype != self .dtype :
615
+ r += ', astype=%s' % self .astype
616
+ r += ')'
617
+ return r
576
618
577
- registry [FixedScaleOffsetFilter .name ] = FixedScaleOffsetFilter
619
+ codec_registry [FixedScaleOffsetFilter .codec_id ] = FixedScaleOffsetFilter
578
620
579
621
580
622
class QuantizeFilter (Codec ):
@@ -615,7 +657,7 @@ class QuantizeFilter(Codec):
615
657
616
658
"""
617
659
618
- name = 'quantize'
660
+ codec_id = 'quantize'
619
661
620
662
def __init__ (self , digits , dtype , astype = None ):
621
663
self .digits = digits
@@ -656,7 +698,7 @@ def decode(self, buf, out=None):
656
698
657
699
def get_config (self ):
658
700
config = dict ()
659
- config ['name ' ] = self .filter_name
701
+ config ['id ' ] = self .codec_id
660
702
config ['digits' ] = self .digits
661
703
config ['dtype' ] = encode_dtype (self .dtype )
662
704
config ['astype' ] = encode_dtype (self .astype )
@@ -669,8 +711,16 @@ def from_config(cls, config):
669
711
digits = config ['digits' ]
670
712
return cls (digits = digits , dtype = dtype , astype = astype )
671
713
714
+ def __repr__ (self ):
715
+ r = '%s(digits=%s, dtype=%s' % \
716
+ (type (self ).__name__ , self .digits , self .dtype )
717
+ if self .astype != self .dtype :
718
+ r += ', astype=%s' % self .astype
719
+ r += ')'
720
+ return r
672
721
673
- registry [QuantizeFilter .name ] = QuantizeFilter
722
+
723
+ codec_registry [QuantizeFilter .codec_id ] = QuantizeFilter
674
724
675
725
676
726
class PackBitsFilter (Codec ):
@@ -696,7 +746,7 @@ class PackBitsFilter(Codec):
696
746
697
747
"""
698
748
699
- name = 'packbits'
749
+ codec_id = 'packbits'
700
750
701
751
def __init__ (self ):
702
752
pass
@@ -751,15 +801,19 @@ def decode(self, buf, out=None):
751
801
752
802
def get_config (self ):
753
803
config = dict ()
754
- config ['name ' ] = self .name
804
+ config ['id ' ] = self .codec_id
755
805
return config
756
806
757
807
@classmethod
758
808
def from_config (cls , config ):
759
809
return cls ()
760
810
811
+ def __repr__ (self ):
812
+ r = '%s()' % type (self ).__name__
813
+ return r
814
+
761
815
762
- registry [PackBitsFilter .name ] = PackBitsFilter
816
+ codec_registry [PackBitsFilter .codec_id ] = PackBitsFilter
763
817
764
818
765
819
def _ensure_bytes (l ):
@@ -791,7 +845,7 @@ class CategorizeFilter(Codec):
791
845
>>> x
792
846
array([b'male', b'female', b'female', b'male', b'unexpected'],
793
847
dtype='|S10')
794
- >>> f = zarr.CategoryFilter (labels=[b'female', b'male'], dtype=x.dtype)
848
+ >>> f = zarr.CategorizeFilter (labels=[b'female', b'male'], dtype=x.dtype)
795
849
>>> y = f.encode(x)
796
850
>>> y
797
851
array([2, 1, 1, 2, 0], dtype=uint8)
@@ -802,7 +856,7 @@ class CategorizeFilter(Codec):
802
856
803
857
"""
804
858
805
- name = 'categorize'
859
+ codec_id = 'categorize'
806
860
807
861
def __init__ (self , labels , dtype , astype = 'u1' ):
808
862
self .labels = [_ensure_bytes (l ) for l in labels ]
@@ -845,7 +899,7 @@ def decode(self, buf, out=None):
845
899
846
900
def get_config (self ):
847
901
config = dict ()
848
- config ['name ' ] = self .name
902
+ config ['id ' ] = self .codec_id
849
903
config ['labels' ] = [text_type (l , 'ascii' ) for l in self .labels ]
850
904
config ['dtype' ] = encode_dtype (self .dtype )
851
905
config ['astype' ] = encode_dtype (self .astype )
@@ -858,5 +912,15 @@ def from_config(cls, config):
858
912
labels = config ['labels' ]
859
913
return cls (labels = labels , dtype = dtype , astype = astype )
860
914
915
+ def __repr__ (self ):
916
+ r = '%s(dtype=%s, astype=%s, labels=%r)' % \
917
+ (type (self ).__name__ , self .dtype , self .astype , self .labels )
918
+ return r
919
+
920
+
921
+ codec_registry [CategorizeFilter .codec_id ] = CategorizeFilter
922
+
861
923
862
- registry [CategorizeFilter .name ] = CategorizeFilter
924
+ __all__ = ['get_codec' , 'codec_registry' ]
925
+ for _cls in codec_registry .values ():
926
+ __all__ .append (_cls .__name__ )
0 commit comments