Prefer pack_tensor/save_tensor in benchs and examples

FrancescAlted · FrancescAlted · commit 6dae23905178 · 2022-10-13T12:09:30.000+02:00
diff --git a/bench/pack_compress.py b/bench/pack_compress.py
@@ -141,18 +141,18 @@
 
         ctic = time.time()
         for i in range(NREP):
-            c = blosc2.pack_array2(in_, cparams=cparams)
+            c = blosc2.pack_tensor(in_, cparams=cparams)
         ctoc = time.time()
         dtic = time.time()
         for i in range(NREP):
-            out = blosc2.unpack_array2(c)
+            out = blosc2.unpack_tensor(c)
         dtoc = time.time()
 
         assert np.array_equal(in_, out)
         tc = (ctoc - ctic) / NREP
         td = (dtoc - dtic) / NREP
         print(
-            "  Time for pack_array2/unpack_array2:   %.3f/%.3f s (%.2f/%.2f GB/s)) "
+            "  Time for pack_tensor/unpack_tensor:   %.3f/%.3f s (%.2f/%.2f GB/s)) "
             % (tc, td, ((N * 8 / tc) / 2 ** 30), ((N * 8 / td) / 2 ** 30)),
             end="",
         )
diff --git a/bench/pack_compress2.py b/bench/pack_compress2.py
@@ -37,11 +37,11 @@
     c = None
     ctic = time.time()
     for i in range(NREP):
-        c = blosc2.pack_array2(in_, cparams=cparams)
+        c = blosc2.pack_tensor(in_, cparams=cparams)
     ctoc = time.time()
     tc = (ctoc - ctic) / NREP
     print(
-        "  Time for pack_array2:   %.3f (%.2f GB/s)) "
+        "  Time for pack_tensor:   %.3f (%.2f GB/s)) "
         % (tc, ((N * 8 / tc) / 2 ** 30)),
     )
     print("\tcr: %5.1fx" % (in_.size * in_.dtype.itemsize * 1.0 / len(c)))
@@ -56,12 +56,12 @@
     out = None
     dtic = time.time()
     for i in range(NREP):
-        out = blosc2.unpack_array2(c)
+        out = blosc2.unpack_tensor(c)
     dtoc = time.time()
 
     td = (dtoc - dtic) / NREP
     print(
-        "  Time for unpack_array2:   %.3f s (%.2f GB/s)) "
+        "  Time for unpack_tensor:   %.3f s (%.2f GB/s)) "
         % (td, ((N * 8 / td) / 2 ** 30)),
     )
     assert np.array_equal(in_, out)
diff --git a/blosc2/c-blosc2 b/blosc2/c-blosc2
@@ -1 +1 @@
-Subproject commit 679bf8650ea51e099521bd9a9ed442426a385014
+Subproject commit 65d81c469b541d926e8b1542324077505e11a666
diff --git a/examples/pack_tensor.py b/examples/pack_tensor.py
@@ -5,16 +5,16 @@
 ########################################################################
 
 
-# A simple example using the pack_array2 and unpack_array2 functions
+# A simple example using the pack_tensor and unpack_tensor functions
 
 import numpy as np
 import blosc2
 
 a = np.arange(1_000_000)
 
 cparams = {"codec": blosc2.Codec.BLOSCLZ}
-cframe = blosc2.pack_array2(a, cparams=cparams)
+cframe = blosc2.pack_tensor(a, cparams=cparams)
 print("Length of packed array in bytes:", len(cframe))
 
-a2 = blosc2.unpack_array2(cframe)
+a2 = blosc2.unpack_tensor(cframe)
 assert np.alltrue(a == a2)
diff --git a/examples/save_tensor.py b/examples/save_tensor.py
@@ -5,15 +5,15 @@
 ########################################################################
 
 
-# A simple example using the save_array and load_array functions
+# A simple example using the save_tensor and load_tensor functions
 
 import numpy as np
 import blosc2
 
 a = np.arange(1_000_000)
 
-file_size = blosc2.save_array(a, "save_array.bl2", mode="w")
-print("Length of saved array in file (bytes):", file_size)
+file_size = blosc2.save_tensor(a, "save_tensor.bl2", mode="w")
+print("Length of saved tensor in file (bytes):", file_size)
 
-a2 = blosc2.load_array("save_array.bl2")
+a2 = blosc2.load_tensor("save_tensor.bl2")
 assert np.alltrue(a == a2)
diff --git a/examples/slicing_and_beyond.ipynb b/examples/slicing_and_beyond.ipynb
@@ -59,23 +59,24 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "b'\\x00\\x00\\x00\\x00'\n"
-     ]
+     "data": {
+      "text/plain": "bytes"
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "out_slice = schunk[:]\n",
-    "print(out_slice[:4])"
+    "type(out_slice)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you can see, the data is returned as a bytestring. If we want to better visualize the data, we will use `get_slice`. You can pass any Python object (supporting the Buffer Protocol) as the `out` param to fill it with the data."
+    "As you can see, the data is returned as a bytes object. If we want to get a more meaningful container instead, we can use `get_slice`, where you can pass any Python object (supporting the Buffer Protocol) as the `out` param to fill it with the data.  In this case we will use a NumPy array contaner."
    ]
   },
   {
@@ -102,11 +103,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "That looks better!\n",
+    "That's the expected data indeed!\n",
     "\n",
     "## Setting data in a SChunk\n",
     "\n",
-    "We can also set the data of an area to any python object supporting the Buffer Protocol. Let's see a quick example:"
+    "We can also set the data of a `SChunk` area from any Python object supporting the Buffer Protocol. Let's see a quick example:"
    ]
   },
   {
@@ -125,7 +126,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "So now, we are able to get or set data all at once. But what if we would like to add data? Well, you can still do it with `__setitem__`. Indeed, this method can update and append data at the same time. To do so, `stop` will be the new SChunk nitems:"
+    "We have seen how to get or set data. But what if we would like to add data? Well, you can still do that with `__setitem__`."
    ]
   },
   {
@@ -142,6 +143,15 @@
     "schunk[start:new_nitems] = new_value"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Here, `start` is less than the number of elements in `SChunk` and `new_items` is larger than this; that means that `__setitem__` can update and append data at the same time, and you don't have to worry about whether you are exceeding the limits of the `SChunk`."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -155,19 +165,9 @@
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "b'\\x9e\\xa8b2f'"
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "buf = schunk.to_cframe()\n",
-    "buf[:5]"
+    "buf = schunk.to_cframe()"
    ]
   },
   {
@@ -190,9 +190,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In this case we set the `copy` param to `True`. If you do not want to copy the buffer,\n",
-    "be mindful that you will have to keep a reference to it until you do not\n",
-    "want the SChunk anymore.\n",
+    "In this case we set the `copy` param to `True`. If you do not want to copy the buffer, be mindful that you will have to keep a reference to it until you do not want the SChunk anymore.\n",
     "\n",
     "\n",
     "## Serializing NumPy arrays\n",
@@ -201,7 +199,7 @@
     "\n",
     "### In-memory\n",
     "\n",
-    "For obtaining an in-memory representation, you can use `pack_array2`. In comparison with its former version (`pack_array`), it is way faster and does not have the 2 GB size limitation:"
+    "For obtaining an in-memory representation, you can use `pack_tensor`. In comparison with its former version (`pack_array`), it is way faster and does not have the 2 GB size limitation:"
    ]
   },
   {
@@ -210,10 +208,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "np_array = np.arange(2**30 + 1, dtype=np.int32)  # 2 GB (+4) array\n",
+    "np_array = np.arange(2**30, dtype=np.int32)  # 4 GB array\n",
     "\n",
-    "packed_arr2 = blosc2.pack_array2(np_array)\n",
-    "unpacked_arr2 = blosc2.unpack_array2(packed_arr2)"
+    "packed_arr2 = blosc2.pack_tensor(np_array)\n",
+    "unpacked_arr2 = blosc2.unpack_tensor(packed_arr2)"
    ]
   },
   {
@@ -222,17 +220,26 @@
    "source": [
     "### On-disk\n",
     "\n",
-    "To store the serialized buffer on-disk you want to use `save_array` and `load_array`:"
+    "To store the serialized buffer on-disk you want to use `save_tensor` and `load_tensor`:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "True"
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "blosc2.save_array(np_array, urlpath=\"ondisk_array.b2frame\", mode=\"w\")\n",
-    "np_array2 = blosc2.load_array(\"ondisk_array.b2frame\")\n",
+    "blosc2.save_tensor(np_array, urlpath=\"ondisk_array.b2frame\", mode=\"w\")\n",
+    "np_array2 = blosc2.load_tensor(\"ondisk_array.b2frame\")\n",
     "np.array_equal(np_array, np_array2)"
    ]
   },