Skip to content

Commit e3d7972

Browse files
committed
benchmarking
1 parent b7f3bd5 commit e3d7972

File tree

1 file changed

+300
-9
lines changed

1 file changed

+300
-9
lines changed

notebooks/object_arrays.ipynb

Lines changed: 300 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@
77
"# Object arrays"
88
]
99
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 12,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import numpy as np"
17+
]
18+
},
1019
{
1120
"cell_type": "code",
1221
"execution_count": 1,
@@ -36,7 +45,7 @@
3645
{
3746
"data": {
3847
"text/plain": [
39-
"'0.2.1'"
48+
"'0.4.0'"
4049
]
4150
},
4251
"execution_count": 2,
@@ -49,6 +58,13 @@
4958
"numcodecs.__version__"
5059
]
5160
},
61+
{
62+
"cell_type": "markdown",
63+
"metadata": {},
64+
"source": [
65+
"## API changes"
66+
]
67+
},
5268
{
5369
"cell_type": "markdown",
5470
"metadata": {},
@@ -121,7 +137,7 @@
121137
},
122138
{
123139
"cell_type": "code",
124-
"execution_count": 6,
140+
"execution_count": 5,
125141
"metadata": {},
126142
"outputs": [
127143
{
@@ -131,7 +147,7 @@
131147
" None, None, None, None], dtype=object)"
132148
]
133149
},
134-
"execution_count": 6,
150+
"execution_count": 5,
135151
"metadata": {},
136152
"output_type": "execute_result"
137153
}
@@ -155,7 +171,7 @@
155171
},
156172
{
157173
"cell_type": "code",
158-
"execution_count": 7,
174+
"execution_count": 6,
159175
"metadata": {},
160176
"outputs": [
161177
{
@@ -165,7 +181,7 @@
165181
"traceback": [
166182
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
167183
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
168-
"\u001b[0;32m<ipython-input-7-ddcd85b97ce0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzarr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
184+
"\u001b[0;32m<ipython-input-6-ddcd85b97ce0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzarr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
169185
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/creation.py\u001b[0m in \u001b[0;36mempty\u001b[0;34m(shape, **kwargs)\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 205\u001b[0m \"\"\"\n\u001b[0;32m--> 206\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 207\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
170186
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/creation.py\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(shape, chunks, dtype, compressor, fill_value, order, store, synchronizer, overwrite, path, chunk_store, filters, cache_metadata, read_only, object_codec, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,\n\u001b[1;32m 113\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moverwrite\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m chunk_store=chunk_store, filters=filters, object_codec=object_codec)\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;31m# instantiate array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
171187
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/storage.py\u001b[0m in \u001b[0;36minit_array\u001b[0;34m(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moverwrite\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0mchunk_store\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunk_store\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m object_codec=object_codec)\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
@@ -187,7 +203,7 @@
187203
},
188204
{
189205
"cell_type": "code",
190-
"execution_count": 9,
206+
"execution_count": 7,
191207
"metadata": {},
192208
"outputs": [
193209
{
@@ -210,7 +226,7 @@
210226
"Chunks initialized : 0/2"
211227
]
212228
},
213-
"execution_count": 9,
229+
"execution_count": 7,
214230
"metadata": {},
215231
"output_type": "execute_result"
216232
}
@@ -223,7 +239,7 @@
223239
},
224240
{
225241
"cell_type": "code",
226-
"execution_count": 10,
242+
"execution_count": 8,
227243
"metadata": {},
228244
"outputs": [
229245
{
@@ -233,7 +249,7 @@
233249
"traceback": [
234250
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
235251
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
236-
"\u001b[0;32m<ipython-input-10-3ac17b59bc55>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'foo'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
252+
"\u001b[0;32m<ipython-input-8-3ac17b59bc55>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'foo'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
237253
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, selection, value)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1095\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpop_fields\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1096\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1097\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
238254
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36mset_basic_selection\u001b[0;34m(self, selection, value, fields)\u001b[0m\n\u001b[1;32m 1189\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_basic_selection_zd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1191\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_basic_selection_nd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1193\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_orthogonal_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
239255
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_set_basic_selection_nd\u001b[0;34m(self, selection, value, fields)\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBasicIndexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1481\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1482\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1483\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1484\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
@@ -248,6 +264,281 @@
248264
"source": [
249265
"z[0] = 'foo'"
250266
]
267+
},
268+
{
269+
"cell_type": "markdown",
270+
"metadata": {},
271+
"source": [
272+
"## Benchmarking"
273+
]
274+
},
275+
{
276+
"cell_type": "code",
277+
"execution_count": 9,
278+
"metadata": {},
279+
"outputs": [],
280+
"source": [
281+
"msgpack_codec = numcodecs.MsgPack()\n",
282+
"json_codec = numcodecs.JSON()\n",
283+
"pickle_codec = numcodecs.Pickle()"
284+
]
285+
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": 10,
289+
"metadata": {},
290+
"outputs": [
291+
{
292+
"data": {
293+
"text/plain": [
294+
"['¡Hola mundo!',\n",
295+
" 'Hej Världen!',\n",
296+
" 'Servus Woid!',\n",
297+
" 'Hei maailma!',\n",
298+
" 'Xin chào thế giới',\n",
299+
" 'Njatjeta Botë!',\n",
300+
" 'Γεια σου κόσμε!',\n",
301+
" 'こんにちは世界',\n",
302+
" '世界,你好!',\n",
303+
" 'Helló, világ!',\n",
304+
" 'Zdravo svete!',\n",
305+
" 'เฮลโลเวิลด์']"
306+
]
307+
},
308+
"execution_count": 10,
309+
"metadata": {},
310+
"output_type": "execute_result"
311+
}
312+
],
313+
"source": [
314+
"from numcodecs.tests.common import greetings\n",
315+
"greetings"
316+
]
317+
},
318+
{
319+
"cell_type": "code",
320+
"execution_count": 13,
321+
"metadata": {},
322+
"outputs": [
323+
{
324+
"data": {
325+
"text/plain": [
326+
"array(['Xin chào thế giới', 'こんにちは世界', 'Hej Världen!', ..., 'Hej Världen!',\n",
327+
" 'Servus Woid!', '¡Hola mundo!'], dtype=object)"
328+
]
329+
},
330+
"execution_count": 13,
331+
"metadata": {},
332+
"output_type": "execute_result"
333+
}
334+
],
335+
"source": [
336+
"a = np.random.choice(greetings, size=1000000).astype(object)\n",
337+
"a"
338+
]
339+
},
340+
{
341+
"cell_type": "code",
342+
"execution_count": 16,
343+
"metadata": {},
344+
"outputs": [],
345+
"source": [
346+
"zstd1 = numcodecs.Zstd(1)\n",
347+
"zstd5 = numcodecs.Zstd(5)\n",
348+
"zstd9 = numcodecs.Zstd(9)"
349+
]
350+
},
351+
{
352+
"cell_type": "code",
353+
"execution_count": 27,
354+
"metadata": {},
355+
"outputs": [],
356+
"source": [
357+
"def benchmark_codec(codec):\n",
358+
" print(codec)\n",
359+
" print('encode')\n",
360+
" %timeit codec.encode(a)\n",
361+
" enc = codec.encode(a)\n",
362+
" print('decode')\n",
363+
" %timeit codec.decode(enc)\n",
364+
" print('size : {:,}'.format(len(enc)))\n",
365+
" print('size (zstd 1): {:,}'.format(len(zstd1.encode(enc))))\n",
366+
" print('size (zstd 1): {:,}'.format(len(zstd5.encode(enc))))\n",
367+
" print('size (zstd 1): {:,}'.format(len(zstd9.encode(enc))))\n",
368+
" "
369+
]
370+
},
371+
{
372+
"cell_type": "code",
373+
"execution_count": 28,
374+
"metadata": {},
375+
"outputs": [
376+
{
377+
"name": "stdout",
378+
"output_type": "stream",
379+
"text": [
380+
"MsgPack(encoding='utf-8')\n",
381+
"encode\n",
382+
"130 ms ± 3.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
383+
"decode\n",
384+
"227 ms ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
385+
"size : 18,995,914\n",
386+
"size (zstd 1): 1,575,701\n",
387+
"size (zstd 1): 1,409,710\n",
388+
"size (zstd 1): 1,310,582\n"
389+
]
390+
}
391+
],
392+
"source": [
393+
"benchmark_codec(msgpack_codec)"
394+
]
395+
},
396+
{
397+
"cell_type": "code",
398+
"execution_count": 29,
399+
"metadata": {},
400+
"outputs": [
401+
{
402+
"name": "stdout",
403+
"output_type": "stream",
404+
"text": [
405+
"JSON(encoding='utf-8', allow_nan=True, check_circular=True, ensure_ascii=True,\n",
406+
" indent=None, separators=(',', ':'), skipkeys=False, sort_keys=True,\n",
407+
" strict=True)\n",
408+
"encode\n",
409+
"178 ms ± 19.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
410+
"decode\n",
411+
"446 ms ± 36 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
412+
"size : 33,312,249\n",
413+
"size (zstd 1): 1,840,581\n",
414+
"size (zstd 1): 1,675,810\n",
415+
"size (zstd 1): 1,523,477\n"
416+
]
417+
}
418+
],
419+
"source": [
420+
"benchmark_codec(json_codec)"
421+
]
422+
},
423+
{
424+
"cell_type": "code",
425+
"execution_count": 30,
426+
"metadata": {},
427+
"outputs": [
428+
{
429+
"name": "stdout",
430+
"output_type": "stream",
431+
"text": [
432+
"Pickle(protocol=4)\n",
433+
"encode\n",
434+
"267 ms ± 61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
435+
"decode\n",
436+
"214 ms ± 5.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
437+
"size : 20,834,114\n",
438+
"size (zstd 1): 1,607,675\n",
439+
"size (zstd 1): 1,436,597\n",
440+
"size (zstd 1): 1,334,082\n"
441+
]
442+
}
443+
],
444+
"source": [
445+
"benchmark_codec(pickle_codec)"
446+
]
447+
},
448+
{
449+
"cell_type": "code",
450+
"execution_count": 46,
451+
"metadata": {},
452+
"outputs": [],
453+
"source": [
454+
"import fastparquet\n",
455+
"\n",
456+
"\n",
457+
"class FastParquetCodec(numcodecs.abc.Codec):\n",
458+
" \"\"\"Hacked codec using fastparquet utf8 encoding, for benchmarking purposes only.\"\"\"\n",
459+
" \n",
460+
" codec_id = 'xxx-fastparquet'\n",
461+
" \n",
462+
" def encode(self, buf):\n",
463+
" buf = np.asanyarray(buf)\n",
464+
" ba = fastparquet.speedups.array_encode_utf8(buf)\n",
465+
" enc = fastparquet.speedups.pack_byte_array(ba.tolist())\n",
466+
" return enc\n",
467+
" \n",
468+
" def decode(self, buf, out=None):\n",
469+
" ba = fastparquet.speedups.unpack_byte_array(buf, a.size) # hack n for now\n",
470+
" dec = fastparquet.speedups.array_decode_utf8(np.array(ba, dtype=object))\n",
471+
" if out is not None:\n",
472+
" out[:] = dec\n",
473+
" return out\n",
474+
" return dec\n",
475+
" "
476+
]
477+
},
478+
{
479+
"cell_type": "code",
480+
"execution_count": 47,
481+
"metadata": {},
482+
"outputs": [],
483+
"source": [
484+
"fp_codec = FastParquetCodec()"
485+
]
486+
},
487+
{
488+
"cell_type": "code",
489+
"execution_count": 50,
490+
"metadata": {},
491+
"outputs": [],
492+
"source": [
493+
"enc = fp_codec.encode(a)"
494+
]
495+
},
496+
{
497+
"cell_type": "code",
498+
"execution_count": 51,
499+
"metadata": {},
500+
"outputs": [
501+
{
502+
"data": {
503+
"text/plain": [
504+
"array(['Xin chào thế giới', 'こんにちは世界', 'Hej Världen!', ..., 'Hej Världen!',\n",
505+
" 'Servus Woid!', '¡Hola mundo!'], dtype=object)"
506+
]
507+
},
508+
"execution_count": 51,
509+
"metadata": {},
510+
"output_type": "execute_result"
511+
}
512+
],
513+
"source": [
514+
"dec = fp_codec.decode(enc)\n",
515+
"dec"
516+
]
517+
},
518+
{
519+
"cell_type": "code",
520+
"execution_count": 52,
521+
"metadata": {},
522+
"outputs": [
523+
{
524+
"name": "stdout",
525+
"output_type": "stream",
526+
"text": [
527+
"FastParquetCodec()\n",
528+
"encode\n",
529+
"88.6 ms ± 2.55 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
530+
"decode\n",
531+
"234 ms ± 6.71 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
532+
"size : 21,829,110\n",
533+
"size (zstd 1): 1,761,775\n",
534+
"size (zstd 1): 1,546,839\n",
535+
"size (zstd 1): 1,359,103\n"
536+
]
537+
}
538+
],
539+
"source": [
540+
"benchmark_codec(fp_codec)"
541+
]
251542
}
252543
],
253544
"metadata": {

0 commit comments

Comments
 (0)