Skip to content

Commit 463ff4e

Browse files
committed
Reorder compression modes
1 parent 160428f commit 463ff4e

File tree

2 files changed

+62
-61
lines changed

2 files changed

+62
-61
lines changed

README.rst

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -132,73 +132,74 @@ improved vs the previous version (labeled as `pack_array/unpack_array`)::
132132
Number of threads to use by default: 8
133133
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
134134
Creating NumPy arrays with 10**8 int64/float64 elements:
135-
Time for copying array with np.copy: 0.196 s (3.80 GB/s))
135+
Time for copying array with np.copy: 0.394 s (3.79 GB/s))
136+
136137

137138
*** the arange linear distribution ***
138139
Using *** Codec.BLOSCLZ *** compressor:
139-
Time for pack_array/unpack_array: 0.312/0.414 s (2.39/1.80 GB/s)) cr: 441.6x
140-
Time for pack_array2/unpack_array2: 0.039/0.084 s (19.02/8.88 GB/s)) cr: 444.0x
141-
Time for compress/decompress: 0.037/0.057 s (20.26/12.98 GB/s)) cr: 444.1x
140+
Time for compress/decompress: 0.051/0.101 s (29.08/14.80 GB/s)) cr: 444.3x
141+
Time for pack_array/unpack_array: 0.600/0.764 s (2.49/1.95 GB/s)) cr: 442.3x
142+
Time for pack_array2/unpack_array2: 0.059/0.158 s (25.28/9.44 GB/s)) cr: 444.2x
142143
Using *** Codec.LZ4 *** compressor:
143-
Time for pack_array/unpack_array: 0.308/0.384 s (2.42/1.94 GB/s)) cr: 277.7x
144-
Time for pack_array2/unpack_array2: 0.037/0.096 s (20.27/7.80 GB/s)) cr: 279.2x
145-
Time for compress/decompress: 0.034/0.053 s (22.19/13.98 GB/s)) cr: 279.2x
144+
Time for compress/decompress: 0.059/0.116 s (25.07/12.82 GB/s)) cr: 279.2x
145+
Time for pack_array/unpack_array: 0.615/0.758 s (2.42/1.97 GB/s)) cr: 277.9x
146+
Time for pack_array2/unpack_array2: 0.058/0.160 s (25.52/9.31 GB/s)) cr: 279.2x
146147
Using *** Codec.LZ4HC *** compressor:
147-
Time for pack_array/unpack_array: 0.423/0.386 s (1.76/1.93 GB/s)) cr: 155.4x
148-
Time for pack_array2/unpack_array2: 0.119/0.094 s (6.27/7.94 GB/s)) cr: 155.9x
149-
Time for compress/decompress: 0.120/0.044 s (6.21/16.77 GB/s)) cr: 155.9x
148+
Time for compress/decompress: 0.193/0.085 s (7.71/17.45 GB/s)) cr: 155.9x
149+
Time for pack_array/unpack_array: 0.786/0.754 s (1.89/1.98 GB/s)) cr: 155.4x
150+
Time for pack_array2/unpack_array2: 0.218/0.165 s (6.84/9.02 GB/s)) cr: 155.9x
150151
Using *** Codec.ZLIB *** compressor:
151-
Time for pack_array/unpack_array: 0.404/0.423 s (1.84/1.76 GB/s)) cr: 273.3x
152-
Time for pack_array2/unpack_array2: 0.139/0.126 s (5.38/5.90 GB/s)) cr: 273.8x
153-
Time for compress/decompress: 0.130/0.078 s (5.75/9.58 GB/s)) cr: 273.8x
152+
Time for compress/decompress: 0.250/0.141 s (5.96/10.55 GB/s)) cr: 273.8x
153+
Time for pack_array/unpack_array: 0.799/0.845 s (1.87/1.76 GB/s)) cr: 273.2x
154+
Time for pack_array2/unpack_array2: 0.261/0.243 s (5.71/6.13 GB/s)) cr: 273.8x
154155
Using *** Codec.ZSTD *** compressor:
155-
Time for pack_array/unpack_array: 0.398/0.410 s (1.87/1.82 GB/s)) cr: 630.8x
156-
Time for pack_array2/unpack_array2: 0.121/0.088 s (6.16/8.50 GB/s)) cr: 644.7x
157-
Time for compress/decompress: 0.112/0.045 s (6.65/16.58 GB/s)) cr: 644.9x
156+
Time for compress/decompress: 0.189/0.079 s (7.89/18.92 GB/s)) cr: 644.9x
157+
Time for pack_array/unpack_array: 0.725/0.770 s (2.06/1.94 GB/s)) cr: 630.9x
158+
Time for pack_array2/unpack_array2: 0.206/0.143 s (7.25/10.39 GB/s)) cr: 644.8x
158159

159160
*** the linspace linear distribution ***
160161
Using *** Codec.BLOSCLZ *** compressor:
161-
Time for pack_array/unpack_array: 0.333/0.398 s (2.24/1.87 GB/s)) cr: 35.7x
162-
Time for pack_array2/unpack_array2: 0.095/0.096 s (7.81/7.74 GB/s)) cr: 35.6x
163-
Time for compress/decompress: 0.076/0.062 s (9.82/12.02 GB/s)) cr: 35.6x
162+
Time for compress/decompress: 0.091/0.113 s (16.34/13.21 GB/s)) cr: 50.1x
163+
Time for pack_array/unpack_array: 0.623/0.751 s (2.39/1.98 GB/s)) cr: 50.0x
164+
Time for pack_array2/unpack_array2: 0.124/0.163 s (11.98/9.12 GB/s)) cr: 50.1x
164165
Using *** Codec.LZ4 *** compressor:
165-
Time for pack_array/unpack_array: 0.327/0.398 s (2.28/1.87 GB/s)) cr: 40.5x
166-
Time for pack_array2/unpack_array2: 0.063/0.095 s (11.91/7.82 GB/s)) cr: 40.5x
167-
Time for compress/decompress: 0.059/0.060 s (12.73/12.45 GB/s)) cr: 40.5x
166+
Time for compress/decompress: 0.077/0.114 s (19.33/13.12 GB/s)) cr: 55.7x
167+
Time for pack_array/unpack_array: 0.624/0.740 s (2.39/2.01 GB/s)) cr: 55.8x
168+
Time for pack_array2/unpack_array2: 0.098/0.190 s (15.19/7.83 GB/s)) cr: 55.7x
168169
Using *** Codec.LZ4HC *** compressor:
169-
Time for pack_array/unpack_array: 0.555/0.406 s (1.34/1.83 GB/s)) cr: 44.7x
170-
Time for pack_array2/unpack_array2: 0.291/0.093 s (2.56/8.04 GB/s)) cr: 44.7x
171-
Time for compress/decompress: 0.259/0.036 s (2.88/20.49 GB/s)) cr: 44.7x
170+
Time for compress/decompress: 0.352/0.075 s (4.23/19.98 GB/s)) cr: 53.6x
171+
Time for pack_array/unpack_array: 0.918/0.781 s (1.62/1.91 GB/s)) cr: 53.6x
172+
Time for pack_array2/unpack_array2: 0.389/0.139 s (3.83/10.72 GB/s)) cr: 53.6x
172173
Using *** Codec.ZLIB *** compressor:
173-
Time for pack_array/unpack_array: 0.516/0.427 s (1.44/1.74 GB/s)) cr: 44.6x
174-
Time for pack_array2/unpack_array2: 0.265/0.132 s (2.82/5.67 GB/s)) cr: 44.6x
175-
Time for compress/decompress: 0.235/0.060 s (3.17/12.33 GB/s)) cr: 44.6x
174+
Time for compress/decompress: 0.395/0.148 s (3.77/10.08 GB/s)) cr: 50.4x
175+
Time for pack_array/unpack_array: 0.940/0.824 s (1.59/1.81 GB/s)) cr: 50.4x
176+
Time for pack_array2/unpack_array2: 0.433/0.252 s (3.44/5.92 GB/s)) cr: 50.4x
176177
Using *** Codec.ZSTD *** compressor:
177-
Time for pack_array/unpack_array: 0.470/0.396 s (1.58/1.88 GB/s)) cr: 78.8x
178-
Time for pack_array2/unpack_array2: 0.189/0.099 s (3.93/7.53 GB/s)) cr: 78.8x
179-
Time for compress/decompress: 0.183/0.072 s (4.07/10.36 GB/s)) cr: 78.8x
178+
Time for compress/decompress: 0.402/0.098 s (3.71/15.22 GB/s)) cr: 74.7x
179+
Time for pack_array/unpack_array: 0.949/0.782 s (1.57/1.91 GB/s)) cr: 74.7x
180+
Time for pack_array2/unpack_array2: 0.426/0.175 s (3.50/8.49 GB/s)) cr: 74.7x
180181

181182
*** the random distribution ***
182183
Using *** Codec.BLOSCLZ *** compressor:
183-
Time for pack_array/unpack_array: 0.419/0.401 s (1.78/1.86 GB/s)) cr: 4.0x
184-
Time for pack_array2/unpack_array2: 0.301/0.076 s (2.48/9.85 GB/s)) cr: 4.0x
185-
Time for compress/decompress: 0.148/0.059 s (5.03/12.70 GB/s)) cr: 4.0x
184+
Time for compress/decompress: 0.240/0.119 s (6.22/12.48 GB/s)) cr: 4.0x
185+
Time for pack_array/unpack_array: 0.794/0.767 s (1.88/1.94 GB/s)) cr: 4.0x
186+
Time for pack_array2/unpack_array2: 0.578/0.162 s (2.58/9.20 GB/s)) cr: 4.0x
186187
Using *** Codec.LZ4 *** compressor:
187-
Time for pack_array/unpack_array: 0.402/0.401 s (1.85/1.86 GB/s)) cr: 4.0x
188-
Time for pack_array2/unpack_array2: 0.310/0.090 s (2.40/8.31 GB/s)) cr: 4.0x
189-
Time for compress/decompress: 0.130/0.060 s (5.73/12.35 GB/s)) cr: 4.0x
188+
Time for compress/decompress: 0.250/0.114 s (5.97/13.11 GB/s)) cr: 4.0x
189+
Time for pack_array/unpack_array: 0.794/0.767 s (1.88/1.94 GB/s)) cr: 4.0x
190+
Time for pack_array2/unpack_array2: 0.590/0.161 s (2.53/9.24 GB/s)) cr: 4.0x
190191
Using *** Codec.LZ4HC *** compressor:
191-
Time for pack_array/unpack_array: 0.866/0.411 s (0.86/1.81 GB/s)) cr: 4.0x
192-
Time for pack_array2/unpack_array2: 0.744/0.076 s (1.00/9.76 GB/s)) cr: 4.0x
193-
Time for compress/decompress: 0.568/0.062 s (1.31/12.05 GB/s)) cr: 4.0x
192+
Time for compress/decompress: 1.102/0.088 s (1.35/17.01 GB/s)) cr: 4.0x
193+
Time for pack_array/unpack_array: 1.690/0.758 s (0.88/1.97 GB/s)) cr: 4.0x
194+
Time for pack_array2/unpack_array2: 1.445/0.178 s (1.03/8.38 GB/s)) cr: 4.0x
194195
Using *** Codec.ZLIB *** compressor:
195-
Time for pack_array/unpack_array: 0.961/0.446 s (0.78/1.67 GB/s)) cr: 4.7x
196-
Time for pack_array2/unpack_array2: 0.826/0.166 s (0.90/4.50 GB/s)) cr: 4.7x
197-
Time for compress/decompress: 0.681/0.107 s (1.09/6.96 GB/s)) cr: 4.7x
196+
Time for compress/decompress: 1.258/0.210 s (1.18/7.11 GB/s)) cr: 4.7x
197+
Time for pack_array/unpack_array: 1.822/0.898 s (0.82/1.66 GB/s)) cr: 4.7x
198+
Time for pack_array2/unpack_array2: 1.549/0.355 s (0.96/4.20 GB/s)) cr: 4.7x
198199
Using *** Codec.ZSTD *** compressor:
199-
Time for pack_array/unpack_array: 1.105/0.414 s (0.67/1.80 GB/s)) cr: 4.4x
200-
Time for pack_array2/unpack_array2: 1.066/0.093 s (0.70/7.99 GB/s)) cr: 4.4x
201-
Time for compress/decompress: 0.828/0.052 s (0.90/14.45 GB/s)) cr: 4.4x
200+
Time for compress/decompress: 1.653/0.098 s (0.90/15.21 GB/s)) cr: 4.4x
201+
Time for pack_array/unpack_array: 2.206/0.796 s (0.68/1.87 GB/s)) cr: 4.4x
202+
Time for pack_array2/unpack_array2: 2.077/0.179 s (0.72/8.30 GB/s)) cr: 4.4x
202203

203204
As can be seen, is perfectly possible for python-blosc2 to go faster than a plain memcpy(). But more interestingly, you can easily choose the codecs and filters that better adapt to your datasets, and persist and transmit them faster and using less memory.
204205

bench/pack_compress.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
NREP = 1
20-
N = int(1e8)
20+
N = int(2e8)
2121
Nexp = np.log10(N)
2222

2323
comprehensive_copy_timing = False
@@ -42,8 +42,8 @@
4242
)
4343

4444
if comprehensive_copy_timing:
45-
out_ = np.empty_like(in_)
4645
tic = time.time()
46+
out_ = np.empty_like(in_)
4747
for i in range(NREP):
4848
np.copyto(out_, in_)
4949
toc = time.time()
@@ -57,8 +57,8 @@
5757
# empty_like and explicitly assigns zeros, which is basically like calling
5858
# full_like
5959
# Here we benchmark what happens when we allocate memory using calloc
60-
out_ = np.zeros(in_.shape, dtype=in_.dtype)
6160
tic = time.time()
61+
out_ = np.zeros(in_.shape, dtype=in_.dtype)
6262
for i in range(NREP):
6363
np.copyto(out_, in_)
6464
toc = time.time()
@@ -69,8 +69,8 @@
6969
)
7070

7171
# Cause a page fault before the benchmark
72-
out_ = np.full_like(in_, fill_value=0)
7372
tic = time.time()
73+
out_ = np.full_like(in_, fill_value=0)
7474
for i in range(NREP):
7575
np.copyto(out_, in_)
7676
toc = time.time()
@@ -80,8 +80,8 @@
8080
% (tcpy, ((N * 8 / tcpy) / 2 ** 30))
8181
)
8282

83-
out_ = np.full_like(in_, fill_value=0)
8483
tic = time.time()
84+
out_ = np.full_like(in_, fill_value=0)
8585
for i in range(NREP):
8686
out_[...] = in_
8787
toc = time.time()
@@ -102,57 +102,57 @@
102102

103103
ctic = time.time()
104104
for i in range(NREP):
105-
c = blosc2.pack_array(in_, clevel=clevel, codec=codec)
105+
c = blosc2.compress(in_, clevel=clevel, codec=codec)
106106
ctoc = time.time()
107107
dtic = time.time()
108+
out = np.empty_like(in_)
108109
for i in range(NREP):
109-
out = blosc2.unpack_array(c)
110+
blosc2.decompress(c, dst=out)
110111
dtoc = time.time()
111112

112113
assert np.array_equal(in_, out)
113114
tc = (ctoc - ctic) / NREP
114115
td = (dtoc - dtic) / NREP
115116
print(
116-
" Time for pack_array/unpack_array: %.3f/%.3f s (%.2f/%.2f GB/s)) "
117+
" Time for compress/decompress: %.3f/%.3f s (%.2f/%.2f GB/s)) "
117118
% (tc, td, ((N * 8 / tc) / 2 ** 30), ((N * 8 / td) / 2 ** 30)),
118119
end="",
119120
)
120121
print("\tcr: %5.1fx" % (in_.size * in_.dtype.itemsize * 1.0 / len(c)))
121122

122123
ctic = time.time()
123124
for i in range(NREP):
124-
c = blosc2.pack_array2(in_, cparams=cparams)
125+
c = blosc2.pack_array(in_, clevel=clevel, codec=codec)
125126
ctoc = time.time()
126127
dtic = time.time()
127128
for i in range(NREP):
128-
out = blosc2.unpack_array2(c)
129+
out = blosc2.unpack_array(c)
129130
dtoc = time.time()
130131

131132
assert np.array_equal(in_, out)
132133
tc = (ctoc - ctic) / NREP
133134
td = (dtoc - dtic) / NREP
134135
print(
135-
" Time for pack_array2/unpack_array2: %.3f/%.3f s (%.2f/%.2f GB/s)) "
136+
" Time for pack_array/unpack_array: %.3f/%.3f s (%.2f/%.2f GB/s)) "
136137
% (tc, td, ((N * 8 / tc) / 2 ** 30), ((N * 8 / td) / 2 ** 30)),
137138
end="",
138139
)
139140
print("\tcr: %5.1fx" % (in_.size * in_.dtype.itemsize * 1.0 / len(c)))
140141

141142
ctic = time.time()
142143
for i in range(NREP):
143-
c = blosc2.compress(in_, clevel=clevel, codec=codec)
144+
c = blosc2.pack_array2(in_, cparams=cparams)
144145
ctoc = time.time()
145146
dtic = time.time()
146-
out = np.empty_like(in_)
147147
for i in range(NREP):
148-
blosc2.decompress(c, dst=out)
148+
out = blosc2.unpack_array2(c)
149149
dtoc = time.time()
150150

151151
assert np.array_equal(in_, out)
152152
tc = (ctoc - ctic) / NREP
153153
td = (dtoc - dtic) / NREP
154154
print(
155-
" Time for compress/decompress: %.3f/%.3f s (%.2f/%.2f GB/s)) "
155+
" Time for pack_array2/unpack_array2: %.3f/%.3f s (%.2f/%.2f GB/s)) "
156156
% (tc, td, ((N * 8 / tc) / 2 ** 30), ((N * 8 / td) / 2 ** 30)),
157157
end="",
158158
)

0 commit comments

Comments
 (0)