Skip to content

Commit c8db5b1

Browse files
authored
Merge pull request #70 from alimanfoo/issue_55
blosc returns bytes; resolves #55
2 parents 1ddaa66 + cc11c65 commit c8db5b1

File tree

6 files changed

+999
-609
lines changed

6 files changed

+999
-609
lines changed

docs/release.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Release notes
22
=============
33

4+
* The Blosc extension has been modified to return bytes instead of array
5+
objects from compress and decompress function calls. This should
6+
improve compatibility and also provides a small performance increase for
7+
compressing high compression ratio data
8+
(`#55 <https://github.com/alimanfoo/zarr/issues/55>`_).
49
* Added ``overwrite`` keyword argument to array and group creation methods
510
on the :class:`zarr.hierarchy.Group` class
611
(`#71 <https://github.com/alimanfoo/zarr/issues/71>`_).
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [
10+
{
11+
"data": {
12+
"text/plain": [
13+
"'2.0.1'"
14+
]
15+
},
16+
"execution_count": 1,
17+
"metadata": {},
18+
"output_type": "execute_result"
19+
}
20+
],
21+
"source": [
22+
"import numpy as np\n",
23+
"import zarr\n",
24+
"zarr.__version__"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": 2,
30+
"metadata": {
31+
"collapsed": false
32+
},
33+
"outputs": [
34+
{
35+
"name": "stdout",
36+
"output_type": "stream",
37+
"text": [
38+
"10 loops, best of 3: 110 ms per loop\n",
39+
"1 loop, best of 3: 235 ms per loop\n",
40+
"Array((100000000,), int64, chunks=(200000,), order=C)\n",
41+
" nbytes: 762.9M; nbytes_stored: 11.2M; ratio: 67.8; initialized: 500/500\n",
42+
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
43+
" store: dict\n"
44+
]
45+
}
46+
],
47+
"source": [
48+
"z = zarr.empty(shape=100000000, chunks=200000, dtype='i8')\n",
49+
"data = np.arange(100000000, dtype='i8')\n",
50+
"%timeit z[:] = data\n",
51+
"%timeit z[:]\n",
52+
"print(z)\n",
53+
"assert np.all(z[:] == data)"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": 3,
59+
"metadata": {
60+
"collapsed": false
61+
},
62+
"outputs": [
63+
{
64+
"name": "stdout",
65+
"output_type": "stream",
66+
"text": [
67+
"1 loop, best of 3: 331 ms per loop\n",
68+
"1 loop, best of 3: 246 ms per loop\n",
69+
"Array((100000000,), float64, chunks=(200000,), order=C)\n",
70+
" nbytes: 762.9M; nbytes_stored: 724.8M; ratio: 1.1; initialized: 500/500\n",
71+
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
72+
" store: dict\n"
73+
]
74+
}
75+
],
76+
"source": [
77+
"z = zarr.empty(shape=100000000, chunks=200000, dtype='f8')\n",
78+
"data = np.random.normal(size=100000000)\n",
79+
"%timeit z[:] = data\n",
80+
"%timeit z[:]\n",
81+
"print(z)\n",
82+
"assert np.all(z[:] == data)"
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"execution_count": 1,
88+
"metadata": {
89+
"collapsed": false
90+
},
91+
"outputs": [
92+
{
93+
"data": {
94+
"text/plain": [
95+
"'2.0.2.dev0+dirty'"
96+
]
97+
},
98+
"execution_count": 1,
99+
"metadata": {},
100+
"output_type": "execute_result"
101+
}
102+
],
103+
"source": [
104+
"import numpy as np\n",
105+
"import sys\n",
106+
"sys.path.insert(0, '..')\n",
107+
"import zarr\n",
108+
"zarr.__version__"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": 2,
114+
"metadata": {
115+
"collapsed": false
116+
},
117+
"outputs": [
118+
{
119+
"name": "stdout",
120+
"output_type": "stream",
121+
"text": [
122+
"10 loops, best of 3: 92.7 ms per loop\n",
123+
"1 loop, best of 3: 230 ms per loop\n",
124+
"Array((100000000,), int64, chunks=(200000,), order=C)\n",
125+
" nbytes: 762.9M; nbytes_stored: 11.2M; ratio: 67.8; initialized: 500/500\n",
126+
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
127+
" store: dict\n"
128+
]
129+
}
130+
],
131+
"source": [
132+
"z = zarr.empty(shape=100000000, chunks=200000, dtype='i8')\n",
133+
"data = np.arange(100000000, dtype='i8')\n",
134+
"%timeit z[:] = data\n",
135+
"%timeit z[:]\n",
136+
"print(z)\n",
137+
"assert np.all(z[:] == data)"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": 3,
143+
"metadata": {
144+
"collapsed": false
145+
},
146+
"outputs": [
147+
{
148+
"name": "stdout",
149+
"output_type": "stream",
150+
"text": [
151+
"1 loop, best of 3: 338 ms per loop\n",
152+
"1 loop, best of 3: 253 ms per loop\n",
153+
"Array((100000000,), float64, chunks=(200000,), order=C)\n",
154+
" nbytes: 762.9M; nbytes_stored: 724.8M; ratio: 1.1; initialized: 500/500\n",
155+
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
156+
" store: dict\n"
157+
]
158+
}
159+
],
160+
"source": [
161+
"z = zarr.empty(shape=100000000, chunks=200000, dtype='f8')\n",
162+
"data = np.random.normal(size=100000000)\n",
163+
"%timeit z[:] = data\n",
164+
"%timeit z[:]\n",
165+
"print(z)\n",
166+
"assert np.all(z[:] == data)"
167+
]
168+
},
169+
{
170+
"cell_type": "code",
171+
"execution_count": null,
172+
"metadata": {
173+
"collapsed": true
174+
},
175+
"outputs": [],
176+
"source": []
177+
}
178+
],
179+
"metadata": {
180+
"kernelspec": {
181+
"display_name": "Python 3",
182+
"language": "python",
183+
"name": "python3"
184+
},
185+
"language_info": {
186+
"codemirror_mode": {
187+
"name": "ipython",
188+
"version": 3
189+
},
190+
"file_extension": ".py",
191+
"mimetype": "text/x-python",
192+
"name": "python",
193+
"nbconvert_exporter": "python",
194+
"pygments_lexer": "ipython3",
195+
"version": "3.5.1"
196+
}
197+
},
198+
"nbformat": 4,
199+
"nbformat_minor": 1
200+
}

0 commit comments

Comments
 (0)