Skip to content

Commit 9b7a309

Browse files
committed
Move kwargs treatment inside of core SChunk
1 parent 59d2959 commit 9b7a309

File tree

2 files changed

+34
-30
lines changed

2 files changed

+34
-30
lines changed

blosc2/SChunk.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ def getall(self):
5858

5959

6060
class SChunk(blosc2_ext.SChunk):
61-
def __init__(self, chunksize=2 ** 24, data=None, **kwargs):
61+
def __init__(self, chunksize=None, data=None, **kwargs):
6262
"""Create a new super-chunk.
6363
6464
Parameters
6565
----------
6666
chunksize: int
6767
The size, in bytes, of the chunks from the super-chunk. If not provided,
68-
it is set to 16 MB.
68+
it is set automatically to a reasonable value.
6969
7070
data: bytes-like object, optional
7171
The data to be split into different chunks of size :paramref:`chunksize`.
@@ -99,13 +99,34 @@ def __init__(self, chunksize=2 ** 24, data=None, **kwargs):
9999
>>> storage = {"contiguous": True, "cparams": {}, "dparams": {}}
100100
>>> schunk = blosc2.SChunk(**storage)
101101
"""
102-
if kwargs is not None:
103-
# This a private param to get an SChunk from a blosc2_schunk*
104-
sc = kwargs.pop("schunk", None)
105-
self.urlpath = kwargs.get("urlpath", None)
106-
else:
107-
self.urlpath = None
108-
sc = None
102+
self.urlpath = kwargs.get("urlpath", None)
103+
if 'contiguous' not in kwargs:
104+
# Make contiguous true for disk, else sparse (for in-memory performance)
105+
kwargs['contiguous'] = False if self.urlpath is None else True
106+
107+
# This a private param to get an SChunk from a blosc2_schunk*
108+
sc = kwargs.pop("schunk", None)
109+
110+
# If not passed, set a sensible typesize
111+
if data is not None and hasattr(data, "itemsize"):
112+
if 'cparams' in kwargs and 'typesize' not in kwargs['cparams']:
113+
cparams = kwargs.pop('cparams').copy()
114+
cparams['typesize'] = data.itemsize
115+
kwargs['cparams'] = cparams
116+
elif 'typesize' not in kwargs:
117+
kwargs['typesize'] = data.itemsize
118+
119+
# chunksize handling
120+
if chunksize is None:
121+
chunksize = 2 ** 24
122+
if data is not None:
123+
chunksize = data.size * data.itemsize
124+
# Make that a multiple of typesize
125+
chunksize = chunksize // data.itemsize * data.itemsize
126+
# Use a cap of 256 MB (most of the modern machines should have this RAM available)
127+
if chunksize > 2 ** 28:
128+
chunksize = 2 ** 28
129+
109130
super(SChunk, self).__init__(schunk=sc, chunksize=chunksize, data=data, **kwargs)
110131
self.vlmeta = vlmeta(super(SChunk, self).c_schunk, self.urlpath, self.mode)
111132

blosc2/core.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -596,26 +596,9 @@ def pack_tensor(tensor, chunksize=None, **kwargs):
596596
"""
597597
import numpy as np
598598
arr = np.asarray(tensor)
599-
# If not passed, set a sensible typesize
600-
if 'cparams' in kwargs and 'typesize' not in kwargs['cparams']:
601-
cparams = kwargs.pop('cparams').copy()
602-
cparams['typesize'] = arr.itemsize
603-
kwargs['cparams'] = cparams
604-
elif 'typesize' not in kwargs:
605-
kwargs['typesize'] = arr.itemsize
606-
607-
urlpath = kwargs.get('urlpath', None)
608-
if 'contiguous' not in kwargs:
609-
kwargs['contiguous'] = False if urlpath is None else True
610-
611-
if chunksize is None:
612-
chunksize = arr.size * arr.itemsize
613-
# Use a cap of 256 MB (most of the modern machines should have this RAM available)
614-
if chunksize > 2 ** 28:
615-
chunksize = 2 ** 28
616-
# Make that a multiple of typesize
617-
chunksize = chunksize // arr.itemsize * arr.itemsize
599+
618600
schunk = blosc2.SChunk(chunksize=chunksize, data=arr, **kwargs)
601+
619602
# Guess the kind of tensor / array
620603
repr_tensor = repr(tensor)
621604
if "tensor" in repr_tensor:
@@ -632,10 +615,10 @@ def pack_tensor(tensor, chunksize=None, **kwargs):
632615

633616
schunk.vlmeta['__pack_tensor__'] = (kind, arr.shape, dtype)
634617

635-
if urlpath is None:
618+
if schunk.urlpath is None:
636619
return schunk.to_cframe()
637620
else:
638-
return os.stat(urlpath).st_size
621+
return os.stat(schunk.urlpath).st_size
639622

640623

641624
def _unpack_tensor(schunk):

0 commit comments

Comments
 (0)