Skip to content

Commit 01c6f1f

Browse files
committed
benchmarks and profiling
1 parent 08048d3 commit 01c6f1f

File tree

4 files changed

+788
-35
lines changed

4 files changed

+788
-35
lines changed
Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [
10+
{
11+
"name": "stdout",
12+
"output_type": "stream",
13+
"text": [
14+
"numpy 1.10.2\n",
15+
"zarr 0.1.1.dev6+dirty blosc 1.7.0 $Date:: 2015-07-05 #$\n",
16+
"bcolz 0.12.2.dev22+dirty blosc 1.7.0 $Date:: 2015-07-05 #$\n"
17+
]
18+
},
19+
{
20+
"data": {
21+
"text/plain": [
22+
"4"
23+
]
24+
},
25+
"execution_count": 1,
26+
"metadata": {},
27+
"output_type": "execute_result"
28+
}
29+
],
30+
"source": [
31+
"import sys\n",
32+
"sys.path.insert(0, '..')\n",
33+
"import cProfile\n",
34+
"import numpy as np; print('numpy', np.__version__)\n",
35+
"import zarr; print('zarr', zarr.__version__, 'blosc', ' '.join(zarr.blosc_version()))\n",
36+
"import bcolz; print('bcolz', bcolz.__version__, 'blosc', ' '.join(bcolz.blosc_version()))\n",
37+
"bcolz.blosc_set_nthreads(1)"
38+
]
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"metadata": {},
43+
"source": [
44+
"## Array creation"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": 2,
50+
"metadata": {
51+
"collapsed": false
52+
},
53+
"outputs": [
54+
{
55+
"data": {
56+
"text/plain": [
57+
"array([ 0, 1, 2, ..., 99999997, 99999998, 99999999], dtype=int32)"
58+
]
59+
},
60+
"execution_count": 2,
61+
"metadata": {},
62+
"output_type": "execute_result"
63+
}
64+
],
65+
"source": [
66+
"a = np.arange(1e8, dtype='i4')\n",
67+
"a"
68+
]
69+
},
70+
{
71+
"cell_type": "code",
72+
"execution_count": 3,
73+
"metadata": {
74+
"collapsed": false
75+
},
76+
"outputs": [
77+
{
78+
"data": {
79+
"text/plain": [
80+
"carray((100000000,), int32)\n",
81+
" nbytes: 381.47 MB; cbytes: 7.68 MB; ratio: 49.67\n",
82+
" cparams := cparams(clevel=5, shuffle=True, cname='lz4')\n",
83+
"[ 0 1 2 ..., 99999997 99999998 99999999]"
84+
]
85+
},
86+
"execution_count": 3,
87+
"metadata": {},
88+
"output_type": "execute_result"
89+
}
90+
],
91+
"source": [
92+
"c = bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))\n",
93+
"c"
94+
]
95+
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": 4,
99+
"metadata": {
100+
"collapsed": false
101+
},
102+
"outputs": [
103+
{
104+
"data": {
105+
"text/plain": [
106+
"zarr.ext.Array((100000000,), int32, chunks=(262144,), nbytes=381.5M, cbytes=6.6M, cratio=57.4, cname=lz4, clevel=5, shuffle=1)"
107+
]
108+
},
109+
"execution_count": 4,
110+
"metadata": {},
111+
"output_type": "execute_result"
112+
}
113+
],
114+
"source": [
115+
"z = zarr.array(a, chunks=c.chunklen, cname='lz4', synchronized=False)\n",
116+
"z"
117+
]
118+
},
119+
{
120+
"cell_type": "code",
121+
"execution_count": 5,
122+
"metadata": {
123+
"collapsed": false
124+
},
125+
"outputs": [
126+
{
127+
"name": "stdout",
128+
"output_type": "stream",
129+
"text": [
130+
"10 loops, best of 3: 132 ms per loop\n"
131+
]
132+
}
133+
],
134+
"source": [
135+
"%timeit bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": 6,
141+
"metadata": {
142+
"collapsed": false
143+
},
144+
"outputs": [
145+
{
146+
"name": "stdout",
147+
"output_type": "stream",
148+
"text": [
149+
"10 loops, best of 3: 137 ms per loop\n"
150+
]
151+
}
152+
],
153+
"source": [
154+
"%timeit zarr.array(a, chunks=c.chunklen, cname='lz4')"
155+
]
156+
},
157+
{
158+
"cell_type": "code",
159+
"execution_count": null,
160+
"metadata": {
161+
"collapsed": false
162+
},
163+
"outputs": [
164+
{
165+
"name": "stdout",
166+
"output_type": "stream",
167+
"text": [
168+
" 8037 function calls (6888 primitive calls) in 0.146 seconds\n",
169+
"\n",
170+
" Ordered by: internal time\n",
171+
"\n",
172+
" ncalls tottime percall cumtime percall filename:lineno(function)\n",
173+
" 382 0.133 0.000 0.133 0.000 ext.pyx:181(compress)\n",
174+
" 762/382 0.003 0.000 0.137 0.000 ext.pyx:279(__setitem__)\n",
175+
" 381/0 0.002 0.000 0.000 ext.pyx:526(genexpr)\n",
176+
" 382 0.001 0.000 0.137 0.000 ext.pyx:154(__setitem__)\n",
177+
" 381/0 0.001 0.000 0.000 ext.pyx:540(genexpr)\n",
178+
" 763 0.001 0.000 0.002 0.000 numeric.py:1970(isscalar)\n",
179+
" 381 0.001 0.000 0.001 0.000 {built-in method array}\n",
180+
" 382 0.001 0.000 0.001 0.000 ext.pyx:109(is_total_slice)\n",
181+
" 763 0.001 0.000 0.001 0.000 {built-in method isinstance}\n",
182+
" 2/1 0.000 0.000 0.002 0.002 ext.pyx:432(__cinit__)\n",
183+
" 382 0.000 0.000 0.001 0.000 ext.pyx:468(create_chunk)\n",
184+
" 381 0.000 0.000 0.001 0.000 numeric.py:527(ascontiguousarray)\n",
185+
" 382 0.000 0.000 0.001 0.000 ext.pyx:130(__cinit__)\n",
186+
" 382 0.000 0.000 0.001 0.000 ext.pyx:271(__init__)\n",
187+
" 382 0.000 0.000 0.000 0.000 ext.pyx:258(clear)\n",
188+
" 382 0.000 0.000 0.000 0.000 threading.py:75(RLock)\n",
189+
" 383 0.000 0.000 0.000 0.000 ext.pyx:67(get_cparams)\n",
190+
" 382 0.000 0.000 0.000 0.000 ext.pyx:254(free)\n",
191+
" 383 0.000 0.000 0.000 0.000 ext.pyx:355(normalise_shape)\n",
192+
" 2/1 0.000 0.000 0.001 0.001 {built-in method exec}\n",
193+
" 2/1 0.000 0.000 0.000 0.000 ext.pyx:287(normalise_array_selection)\n",
194+
" 2/1 0.000 0.000 0.000 0.000 ext.pyx:364(normalise_chunks)\n",
195+
" 1 0.000 0.000 0.000 0.000 ext.pyx:347(get_chunk_range)\n",
196+
" 2/1 0.000 0.000 0.001 0.001 core.py:154(array)\n",
197+
" 2/1 0.000 0.000 0.001 0.001 <string>:1(<module>)\n",
198+
" 2/1 0.000 0.000 0.001 0.001 ext.pyx:507(__setitem__)\n",
199+
" 1 0.000 0.000 0.000 0.000 ext.pyx:221(__getitem__)\n",
200+
" 1 0.000 0.000 0.000 0.000 ext.pyx:316(normalise_axis_selection)\n",
201+
" 2 0.000 0.000 0.000 0.000 {built-in method hasattr}\n",
202+
" 1 0.000 0.000 0.000 0.000 carray_ext.pyx:932(__get__)\n",
203+
" 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
204+
"\n",
205+
"\n"
206+
]
207+
}
208+
],
209+
"source": [
210+
"cProfile.run('zarr.array(a, chunks=c.chunklen, cname=\"lz4\")', sort='time')"
211+
]
212+
},
213+
{
214+
"cell_type": "markdown",
215+
"metadata": {},
216+
"source": [
217+
"## Array read"
218+
]
219+
},
220+
{
221+
"cell_type": "code",
222+
"execution_count": null,
223+
"metadata": {
224+
"collapsed": false
225+
},
226+
"outputs": [],
227+
"source": [
228+
"%timeit c[:]"
229+
]
230+
},
231+
{
232+
"cell_type": "code",
233+
"execution_count": null,
234+
"metadata": {
235+
"collapsed": false
236+
},
237+
"outputs": [],
238+
"source": [
239+
"%timeit z[:]"
240+
]
241+
},
242+
{
243+
"cell_type": "code",
244+
"execution_count": null,
245+
"metadata": {
246+
"collapsed": false
247+
},
248+
"outputs": [],
249+
"source": [
250+
"cProfile.run('z[:]', sort='time')"
251+
]
252+
},
253+
{
254+
"cell_type": "code",
255+
"execution_count": null,
256+
"metadata": {
257+
"collapsed": true
258+
},
259+
"outputs": [],
260+
"source": [
261+
"import line_profiler"
262+
]
263+
},
264+
{
265+
"cell_type": "code",
266+
"execution_count": null,
267+
"metadata": {
268+
"collapsed": false
269+
},
270+
"outputs": [],
271+
"source": [
272+
"profile = line_profiler.LineProfiler(zarr.ext.array_getitem)\n",
273+
"profile.run('z[:]')\n",
274+
"profile.print_stats()"
275+
]
276+
},
277+
{
278+
"cell_type": "code",
279+
"execution_count": null,
280+
"metadata": {
281+
"collapsed": true
282+
},
283+
"outputs": [],
284+
"source": []
285+
}
286+
],
287+
"metadata": {
288+
"kernelspec": {
289+
"display_name": "Python 3",
290+
"language": "python",
291+
"name": "python3"
292+
},
293+
"language_info": {
294+
"codemirror_mode": {
295+
"name": "ipython",
296+
"version": 3
297+
},
298+
"file_extension": ".py",
299+
"mimetype": "text/x-python",
300+
"name": "python",
301+
"nbconvert_exporter": "python",
302+
"pygments_lexer": "ipython3",
303+
"version": "3.4.3+"
304+
}
305+
},
306+
"nbformat": 4,
307+
"nbformat_minor": 0
308+
}

0 commit comments

Comments
 (0)