Skip to content

Commit aac74ff

Browse files
committed
add categorize
1 parent 64ed0af commit aac74ff

File tree

1 file changed

+34
-8
lines changed

1 file changed

+34
-8
lines changed

notebooks/object_arrays.ipynb

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -307,17 +307,17 @@
307307
},
308308
{
309309
"cell_type": "code",
310-
"execution_count": 12,
310+
"execution_count": 17,
311311
"metadata": {},
312312
"outputs": [
313313
{
314314
"data": {
315315
"text/plain": [
316-
"array(['Xin chào thế giới', 'Servus Woid!', 'Γεια σου κόσμε!', ...,\n",
317-
" 'Γεια σου κόσμε!', 'Hei maailma!', 'เฮลโลเวิลด์'], dtype=object)"
316+
"array(['Hej Världen!', 'こんにちは世界', 'Servus Woid!', ..., 'Helló, világ!',\n",
317+
" 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object)"
318318
]
319319
},
320-
"execution_count": 12,
320+
"execution_count": 17,
321321
"metadata": {},
322322
"output_type": "execute_result"
323323
}
@@ -329,18 +329,19 @@
329329
},
330330
{
331331
"cell_type": "code",
332-
"execution_count": 13,
332+
"execution_count": 12,
333333
"metadata": {},
334334
"outputs": [],
335335
"source": [
336336
"msgpack_codec = numcodecs.MsgPack()\n",
337337
"json_codec = numcodecs.JSON()\n",
338-
"pickle_codec = numcodecs.Pickle()"
338+
"pickle_codec = numcodecs.Pickle()\n",
339+
"cat_codec = numcodecs.Categorize(greetings, dtype=object, astype='u1')"
339340
]
340341
},
341342
{
342343
"cell_type": "code",
343-
"execution_count": 14,
344+
"execution_count": 13,
344345
"metadata": {},
345346
"outputs": [],
346347
"source": [
@@ -351,7 +352,7 @@
351352
},
352353
{
353354
"cell_type": "code",
354-
"execution_count": 15,
355+
"execution_count": 14,
355356
"metadata": {},
356357
"outputs": [],
357358
"source": [
@@ -446,6 +447,31 @@
446447
"benchmark_codec(pickle_codec)"
447448
]
448449
},
450+
{
451+
"cell_type": "code",
452+
"execution_count": 19,
453+
"metadata": {},
454+
"outputs": [
455+
{
456+
"name": "stdout",
457+
"output_type": "stream",
458+
"text": [
459+
"Categorize(dtype='|O', astype='|u1', labels=['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ...])\n",
460+
"encode\n",
461+
"216 ms ± 6.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
462+
"decode\n",
463+
"29 ms ± 221 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
464+
"size : 1,000,000\n",
465+
"size (zstd 1): 458,146\n",
466+
"size (zstd 5): 493,686\n",
467+
"size (zstd 9): 490,583\n"
468+
]
469+
}
470+
],
471+
"source": [
472+
"benchmark_codec(cat_codec)"
473+
]
474+
},
449475
{
450476
"cell_type": "code",
451477
"execution_count": 19,

0 commit comments

Comments
 (0)