|
307 | 307 | },
|
308 | 308 | {
|
309 | 309 | "cell_type": "code",
|
310 |
| - "execution_count": 12, |
| 310 | + "execution_count": 17, |
311 | 311 | "metadata": {},
|
312 | 312 | "outputs": [
|
313 | 313 | {
|
314 | 314 | "data": {
|
315 | 315 | "text/plain": [
|
316 |
| - "array(['Xin chào thế giới', 'Servus Woid!', 'Γεια σου κόσμε!', ...,\n", |
317 |
| - " 'Γεια σου κόσμε!', 'Hei maailma!', 'เฮลโลเวิลด์'], dtype=object)" |
| 316 | + "array(['Hej Världen!', 'こんにちは世界', 'Servus Woid!', ..., 'Helló, világ!',\n", |
| 317 | + " 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object)" |
318 | 318 | ]
|
319 | 319 | },
|
320 |
| - "execution_count": 12, |
| 320 | + "execution_count": 17, |
321 | 321 | "metadata": {},
|
322 | 322 | "output_type": "execute_result"
|
323 | 323 | }
|
|
329 | 329 | },
|
330 | 330 | {
|
331 | 331 | "cell_type": "code",
|
332 |
| - "execution_count": 13, |
| 332 | + "execution_count": 12, |
333 | 333 | "metadata": {},
|
334 | 334 | "outputs": [],
|
335 | 335 | "source": [
|
336 | 336 | "msgpack_codec = numcodecs.MsgPack()\n",
|
337 | 337 | "json_codec = numcodecs.JSON()\n",
|
338 |
| - "pickle_codec = numcodecs.Pickle()" |
| 338 | + "pickle_codec = numcodecs.Pickle()\n", |
| 339 | + "cat_codec = numcodecs.Categorize(greetings, dtype=object, astype='u1')" |
339 | 340 | ]
|
340 | 341 | },
|
341 | 342 | {
|
342 | 343 | "cell_type": "code",
|
343 |
| - "execution_count": 14, |
| 344 | + "execution_count": 13, |
344 | 345 | "metadata": {},
|
345 | 346 | "outputs": [],
|
346 | 347 | "source": [
|
|
351 | 352 | },
|
352 | 353 | {
|
353 | 354 | "cell_type": "code",
|
354 |
| - "execution_count": 15, |
| 355 | + "execution_count": 14, |
355 | 356 | "metadata": {},
|
356 | 357 | "outputs": [],
|
357 | 358 | "source": [
|
|
446 | 447 | "benchmark_codec(pickle_codec)"
|
447 | 448 | ]
|
448 | 449 | },
|
| 450 | + { |
| 451 | + "cell_type": "code", |
| 452 | + "execution_count": 19, |
| 453 | + "metadata": {}, |
| 454 | + "outputs": [ |
| 455 | + { |
| 456 | + "name": "stdout", |
| 457 | + "output_type": "stream", |
| 458 | + "text": [ |
| 459 | + "Categorize(dtype='|O', astype='|u1', labels=['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ...])\n", |
| 460 | + "encode\n", |
| 461 | + "216 ms ± 6.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", |
| 462 | + "decode\n", |
| 463 | + "29 ms ± 221 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", |
| 464 | + "size : 1,000,000\n", |
| 465 | + "size (zstd 1): 458,146\n", |
| 466 | + "size (zstd 5): 493,686\n", |
| 467 | + "size (zstd 9): 490,583\n" |
| 468 | + ] |
| 469 | + } |
| 470 | + ], |
| 471 | + "source": [ |
| 472 | + "benchmark_codec(cat_codec)" |
| 473 | + ] |
| 474 | + }, |
449 | 475 | {
|
450 | 476 | "cell_type": "code",
|
451 | 477 | "execution_count": 19,
|
|
0 commit comments