|
7 | 7 | "# Object arrays"
|
8 | 8 | ]
|
9 | 9 | },
|
| 10 | + { |
| 11 | + "cell_type": "code", |
| 12 | + "execution_count": 12, |
| 13 | + "metadata": {}, |
| 14 | + "outputs": [], |
| 15 | + "source": [ |
| 16 | + "import numpy as np" |
| 17 | + ] |
| 18 | + }, |
10 | 19 | {
|
11 | 20 | "cell_type": "code",
|
12 | 21 | "execution_count": 1,
|
|
36 | 45 | {
|
37 | 46 | "data": {
|
38 | 47 | "text/plain": [
|
39 |
| - "'0.2.1'" |
| 48 | + "'0.4.0'" |
40 | 49 | ]
|
41 | 50 | },
|
42 | 51 | "execution_count": 2,
|
|
49 | 58 | "numcodecs.__version__"
|
50 | 59 | ]
|
51 | 60 | },
|
| 61 | + { |
| 62 | + "cell_type": "markdown", |
| 63 | + "metadata": {}, |
| 64 | + "source": [ |
| 65 | + "## API changes" |
| 66 | + ] |
| 67 | + }, |
52 | 68 | {
|
53 | 69 | "cell_type": "markdown",
|
54 | 70 | "metadata": {},
|
|
121 | 137 | },
|
122 | 138 | {
|
123 | 139 | "cell_type": "code",
|
124 |
| - "execution_count": 6, |
| 140 | + "execution_count": 5, |
125 | 141 | "metadata": {},
|
126 | 142 | "outputs": [
|
127 | 143 | {
|
|
131 | 147 | " None, None, None, None], dtype=object)"
|
132 | 148 | ]
|
133 | 149 | },
|
134 |
| - "execution_count": 6, |
| 150 | + "execution_count": 5, |
135 | 151 | "metadata": {},
|
136 | 152 | "output_type": "execute_result"
|
137 | 153 | }
|
|
155 | 171 | },
|
156 | 172 | {
|
157 | 173 | "cell_type": "code",
|
158 |
| - "execution_count": 7, |
| 174 | + "execution_count": 6, |
159 | 175 | "metadata": {},
|
160 | 176 | "outputs": [
|
161 | 177 | {
|
|
165 | 181 | "traceback": [
|
166 | 182 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
167 | 183 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
168 |
| - "\u001b[0;32m<ipython-input-7-ddcd85b97ce0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzarr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
| 184 | + "\u001b[0;32m<ipython-input-6-ddcd85b97ce0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzarr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
169 | 185 | "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/creation.py\u001b[0m in \u001b[0;36mempty\u001b[0;34m(shape, **kwargs)\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 205\u001b[0m \"\"\"\n\u001b[0;32m--> 206\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 207\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
170 | 186 | "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/creation.py\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(shape, chunks, dtype, compressor, fill_value, order, store, synchronizer, overwrite, path, chunk_store, filters, cache_metadata, read_only, object_codec, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,\n\u001b[1;32m 113\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moverwrite\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m chunk_store=chunk_store, filters=filters, object_codec=object_codec)\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;31m# instantiate array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
171 | 187 | "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/storage.py\u001b[0m in \u001b[0;36minit_array\u001b[0;34m(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moverwrite\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0mchunk_store\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunk_store\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m object_codec=object_codec)\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
187 | 203 | },
|
188 | 204 | {
|
189 | 205 | "cell_type": "code",
|
190 |
| - "execution_count": 9, |
| 206 | + "execution_count": 7, |
191 | 207 | "metadata": {},
|
192 | 208 | "outputs": [
|
193 | 209 | {
|
|
210 | 226 | "Chunks initialized : 0/2"
|
211 | 227 | ]
|
212 | 228 | },
|
213 |
| - "execution_count": 9, |
| 229 | + "execution_count": 7, |
214 | 230 | "metadata": {},
|
215 | 231 | "output_type": "execute_result"
|
216 | 232 | }
|
|
223 | 239 | },
|
224 | 240 | {
|
225 | 241 | "cell_type": "code",
|
226 |
| - "execution_count": 10, |
| 242 | + "execution_count": 8, |
227 | 243 | "metadata": {},
|
228 | 244 | "outputs": [
|
229 | 245 | {
|
|
233 | 249 | "traceback": [
|
234 | 250 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
235 | 251 | "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
236 |
| - "\u001b[0;32m<ipython-input-10-3ac17b59bc55>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'foo'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
| 252 | + "\u001b[0;32m<ipython-input-8-3ac17b59bc55>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'foo'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
237 | 253 | "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, selection, value)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1095\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpop_fields\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1096\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1097\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
238 | 254 | "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36mset_basic_selection\u001b[0;34m(self, selection, value, fields)\u001b[0m\n\u001b[1;32m 1189\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_basic_selection_zd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1191\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_basic_selection_nd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1193\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_orthogonal_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
239 | 255 | "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_set_basic_selection_nd\u001b[0;34m(self, selection, value, fields)\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBasicIndexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1481\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1482\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1483\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1484\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
248 | 264 | "source": [
|
249 | 265 | "z[0] = 'foo'"
|
250 | 266 | ]
|
| 267 | + }, |
| 268 | + { |
| 269 | + "cell_type": "markdown", |
| 270 | + "metadata": {}, |
| 271 | + "source": [ |
| 272 | + "## Benchmarking" |
| 273 | + ] |
| 274 | + }, |
| 275 | + { |
| 276 | + "cell_type": "code", |
| 277 | + "execution_count": 9, |
| 278 | + "metadata": {}, |
| 279 | + "outputs": [], |
| 280 | + "source": [ |
| 281 | + "msgpack_codec = numcodecs.MsgPack()\n", |
| 282 | + "json_codec = numcodecs.JSON()\n", |
| 283 | + "pickle_codec = numcodecs.Pickle()" |
| 284 | + ] |
| 285 | + }, |
| 286 | + { |
| 287 | + "cell_type": "code", |
| 288 | + "execution_count": 10, |
| 289 | + "metadata": {}, |
| 290 | + "outputs": [ |
| 291 | + { |
| 292 | + "data": { |
| 293 | + "text/plain": [ |
| 294 | + "['¡Hola mundo!',\n", |
| 295 | + " 'Hej Världen!',\n", |
| 296 | + " 'Servus Woid!',\n", |
| 297 | + " 'Hei maailma!',\n", |
| 298 | + " 'Xin chào thế giới',\n", |
| 299 | + " 'Njatjeta Botë!',\n", |
| 300 | + " 'Γεια σου κόσμε!',\n", |
| 301 | + " 'こんにちは世界',\n", |
| 302 | + " '世界,你好!',\n", |
| 303 | + " 'Helló, világ!',\n", |
| 304 | + " 'Zdravo svete!',\n", |
| 305 | + " 'เฮลโลเวิลด์']" |
| 306 | + ] |
| 307 | + }, |
| 308 | + "execution_count": 10, |
| 309 | + "metadata": {}, |
| 310 | + "output_type": "execute_result" |
| 311 | + } |
| 312 | + ], |
| 313 | + "source": [ |
| 314 | + "from numcodecs.tests.common import greetings\n", |
| 315 | + "greetings" |
| 316 | + ] |
| 317 | + }, |
| 318 | + { |
| 319 | + "cell_type": "code", |
| 320 | + "execution_count": 13, |
| 321 | + "metadata": {}, |
| 322 | + "outputs": [ |
| 323 | + { |
| 324 | + "data": { |
| 325 | + "text/plain": [ |
| 326 | + "array(['Xin chào thế giới', 'こんにちは世界', 'Hej Världen!', ..., 'Hej Världen!',\n", |
| 327 | + " 'Servus Woid!', '¡Hola mundo!'], dtype=object)" |
| 328 | + ] |
| 329 | + }, |
| 330 | + "execution_count": 13, |
| 331 | + "metadata": {}, |
| 332 | + "output_type": "execute_result" |
| 333 | + } |
| 334 | + ], |
| 335 | + "source": [ |
| 336 | + "a = np.random.choice(greetings, size=1000000).astype(object)\n", |
| 337 | + "a" |
| 338 | + ] |
| 339 | + }, |
| 340 | + { |
| 341 | + "cell_type": "code", |
| 342 | + "execution_count": 16, |
| 343 | + "metadata": {}, |
| 344 | + "outputs": [], |
| 345 | + "source": [ |
| 346 | + "zstd1 = numcodecs.Zstd(1)\n", |
| 347 | + "zstd5 = numcodecs.Zstd(5)\n", |
| 348 | + "zstd9 = numcodecs.Zstd(9)" |
| 349 | + ] |
| 350 | + }, |
| 351 | + { |
| 352 | + "cell_type": "code", |
| 353 | + "execution_count": 27, |
| 354 | + "metadata": {}, |
| 355 | + "outputs": [], |
| 356 | + "source": [ |
| 357 | + "def benchmark_codec(codec):\n", |
| 358 | + " print(codec)\n", |
| 359 | + " print('encode')\n", |
| 360 | + " %timeit codec.encode(a)\n", |
| 361 | + " enc = codec.encode(a)\n", |
| 362 | + " print('decode')\n", |
| 363 | + " %timeit codec.decode(enc)\n", |
| 364 | + " print('size : {:,}'.format(len(enc)))\n", |
| 365 | + " print('size (zstd 1): {:,}'.format(len(zstd1.encode(enc))))\n", |
| 366 | + " print('size (zstd 1): {:,}'.format(len(zstd5.encode(enc))))\n", |
| 367 | + " print('size (zstd 1): {:,}'.format(len(zstd9.encode(enc))))\n", |
| 368 | + " " |
| 369 | + ] |
| 370 | + }, |
| 371 | + { |
| 372 | + "cell_type": "code", |
| 373 | + "execution_count": 28, |
| 374 | + "metadata": {}, |
| 375 | + "outputs": [ |
| 376 | + { |
| 377 | + "name": "stdout", |
| 378 | + "output_type": "stream", |
| 379 | + "text": [ |
| 380 | + "MsgPack(encoding='utf-8')\n", |
| 381 | + "encode\n", |
| 382 | + "130 ms ± 3.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", |
| 383 | + "decode\n", |
| 384 | + "227 ms ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", |
| 385 | + "size : 18,995,914\n", |
| 386 | + "size (zstd 1): 1,575,701\n", |
| 387 | + "size (zstd 1): 1,409,710\n", |
| 388 | + "size (zstd 1): 1,310,582\n" |
| 389 | + ] |
| 390 | + } |
| 391 | + ], |
| 392 | + "source": [ |
| 393 | + "benchmark_codec(msgpack_codec)" |
| 394 | + ] |
| 395 | + }, |
| 396 | + { |
| 397 | + "cell_type": "code", |
| 398 | + "execution_count": 29, |
| 399 | + "metadata": {}, |
| 400 | + "outputs": [ |
| 401 | + { |
| 402 | + "name": "stdout", |
| 403 | + "output_type": "stream", |
| 404 | + "text": [ |
| 405 | + "JSON(encoding='utf-8', allow_nan=True, check_circular=True, ensure_ascii=True,\n", |
| 406 | + " indent=None, separators=(',', ':'), skipkeys=False, sort_keys=True,\n", |
| 407 | + " strict=True)\n", |
| 408 | + "encode\n", |
| 409 | + "178 ms ± 19.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", |
| 410 | + "decode\n", |
| 411 | + "446 ms ± 36 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", |
| 412 | + "size : 33,312,249\n", |
| 413 | + "size (zstd 1): 1,840,581\n", |
| 414 | + "size (zstd 1): 1,675,810\n", |
| 415 | + "size (zstd 1): 1,523,477\n" |
| 416 | + ] |
| 417 | + } |
| 418 | + ], |
| 419 | + "source": [ |
| 420 | + "benchmark_codec(json_codec)" |
| 421 | + ] |
| 422 | + }, |
| 423 | + { |
| 424 | + "cell_type": "code", |
| 425 | + "execution_count": 30, |
| 426 | + "metadata": {}, |
| 427 | + "outputs": [ |
| 428 | + { |
| 429 | + "name": "stdout", |
| 430 | + "output_type": "stream", |
| 431 | + "text": [ |
| 432 | + "Pickle(protocol=4)\n", |
| 433 | + "encode\n", |
| 434 | + "267 ms ± 61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", |
| 435 | + "decode\n", |
| 436 | + "214 ms ± 5.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", |
| 437 | + "size : 20,834,114\n", |
| 438 | + "size (zstd 1): 1,607,675\n", |
| 439 | + "size (zstd 1): 1,436,597\n", |
| 440 | + "size (zstd 1): 1,334,082\n" |
| 441 | + ] |
| 442 | + } |
| 443 | + ], |
| 444 | + "source": [ |
| 445 | + "benchmark_codec(pickle_codec)" |
| 446 | + ] |
| 447 | + }, |
| 448 | + { |
| 449 | + "cell_type": "code", |
| 450 | + "execution_count": 46, |
| 451 | + "metadata": {}, |
| 452 | + "outputs": [], |
| 453 | + "source": [ |
| 454 | + "import fastparquet\n", |
| 455 | + "\n", |
| 456 | + "\n", |
| 457 | + "class FastParquetCodec(numcodecs.abc.Codec):\n", |
| 458 | + " \"\"\"Hacked codec using fastparquet utf8 encoding, for benchmarking purposes only.\"\"\"\n", |
| 459 | + " \n", |
| 460 | + " codec_id = 'xxx-fastparquet'\n", |
| 461 | + " \n", |
| 462 | + " def encode(self, buf):\n", |
| 463 | + " buf = np.asanyarray(buf)\n", |
| 464 | + " ba = fastparquet.speedups.array_encode_utf8(buf)\n", |
| 465 | + " enc = fastparquet.speedups.pack_byte_array(ba.tolist())\n", |
| 466 | + " return enc\n", |
| 467 | + " \n", |
| 468 | + " def decode(self, buf, out=None):\n", |
| 469 | + " ba = fastparquet.speedups.unpack_byte_array(buf, a.size) # hack n for now\n", |
| 470 | + " dec = fastparquet.speedups.array_decode_utf8(np.array(ba, dtype=object))\n", |
| 471 | + " if out is not None:\n", |
| 472 | + " out[:] = dec\n", |
| 473 | + " return out\n", |
| 474 | + " return dec\n", |
| 475 | + " " |
| 476 | + ] |
| 477 | + }, |
| 478 | + { |
| 479 | + "cell_type": "code", |
| 480 | + "execution_count": 47, |
| 481 | + "metadata": {}, |
| 482 | + "outputs": [], |
| 483 | + "source": [ |
| 484 | + "fp_codec = FastParquetCodec()" |
| 485 | + ] |
| 486 | + }, |
| 487 | + { |
| 488 | + "cell_type": "code", |
| 489 | + "execution_count": 50, |
| 490 | + "metadata": {}, |
| 491 | + "outputs": [], |
| 492 | + "source": [ |
| 493 | + "enc = fp_codec.encode(a)" |
| 494 | + ] |
| 495 | + }, |
| 496 | + { |
| 497 | + "cell_type": "code", |
| 498 | + "execution_count": 51, |
| 499 | + "metadata": {}, |
| 500 | + "outputs": [ |
| 501 | + { |
| 502 | + "data": { |
| 503 | + "text/plain": [ |
| 504 | + "array(['Xin chào thế giới', 'こんにちは世界', 'Hej Världen!', ..., 'Hej Världen!',\n", |
| 505 | + " 'Servus Woid!', '¡Hola mundo!'], dtype=object)" |
| 506 | + ] |
| 507 | + }, |
| 508 | + "execution_count": 51, |
| 509 | + "metadata": {}, |
| 510 | + "output_type": "execute_result" |
| 511 | + } |
| 512 | + ], |
| 513 | + "source": [ |
| 514 | + "dec = fp_codec.decode(enc)\n", |
| 515 | + "dec" |
| 516 | + ] |
| 517 | + }, |
| 518 | + { |
| 519 | + "cell_type": "code", |
| 520 | + "execution_count": 52, |
| 521 | + "metadata": {}, |
| 522 | + "outputs": [ |
| 523 | + { |
| 524 | + "name": "stdout", |
| 525 | + "output_type": "stream", |
| 526 | + "text": [ |
| 527 | + "FastParquetCodec()\n", |
| 528 | + "encode\n", |
| 529 | + "88.6 ms ± 2.55 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", |
| 530 | + "decode\n", |
| 531 | + "234 ms ± 6.71 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", |
| 532 | + "size : 21,829,110\n", |
| 533 | + "size (zstd 1): 1,761,775\n", |
| 534 | + "size (zstd 1): 1,546,839\n", |
| 535 | + "size (zstd 1): 1,359,103\n" |
| 536 | + ] |
| 537 | + } |
| 538 | + ], |
| 539 | + "source": [ |
| 540 | + "benchmark_codec(fp_codec)" |
| 541 | + ] |
251 | 542 | }
|
252 | 543 | ],
|
253 | 544 | "metadata": {
|
|
0 commit comments