Skip to content

Commit 6dae239

Browse files
committed
Prefer pack_tensor/save_tensor in benchs and examples
1 parent 3b34e95 commit 6dae239

File tree

6 files changed

+55
-48
lines changed

6 files changed

+55
-48
lines changed

bench/pack_compress.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,18 +141,18 @@
141141

142142
ctic = time.time()
143143
for i in range(NREP):
144-
c = blosc2.pack_array2(in_, cparams=cparams)
144+
c = blosc2.pack_tensor(in_, cparams=cparams)
145145
ctoc = time.time()
146146
dtic = time.time()
147147
for i in range(NREP):
148-
out = blosc2.unpack_array2(c)
148+
out = blosc2.unpack_tensor(c)
149149
dtoc = time.time()
150150

151151
assert np.array_equal(in_, out)
152152
tc = (ctoc - ctic) / NREP
153153
td = (dtoc - dtic) / NREP
154154
print(
155-
" Time for pack_array2/unpack_array2: %.3f/%.3f s (%.2f/%.2f GB/s)) "
155+
" Time for pack_tensor/unpack_tensor: %.3f/%.3f s (%.2f/%.2f GB/s)) "
156156
% (tc, td, ((N * 8 / tc) / 2 ** 30), ((N * 8 / td) / 2 ** 30)),
157157
end="",
158158
)

bench/pack_compress2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@
3737
c = None
3838
ctic = time.time()
3939
for i in range(NREP):
40-
c = blosc2.pack_array2(in_, cparams=cparams)
40+
c = blosc2.pack_tensor(in_, cparams=cparams)
4141
ctoc = time.time()
4242
tc = (ctoc - ctic) / NREP
4343
print(
44-
" Time for pack_array2: %.3f (%.2f GB/s)) "
44+
" Time for pack_tensor: %.3f (%.2f GB/s)) "
4545
% (tc, ((N * 8 / tc) / 2 ** 30)),
4646
)
4747
print("\tcr: %5.1fx" % (in_.size * in_.dtype.itemsize * 1.0 / len(c)))
@@ -56,12 +56,12 @@
5656
out = None
5757
dtic = time.time()
5858
for i in range(NREP):
59-
out = blosc2.unpack_array2(c)
59+
out = blosc2.unpack_tensor(c)
6060
dtoc = time.time()
6161

6262
td = (dtoc - dtic) / NREP
6363
print(
64-
" Time for unpack_array2: %.3f s (%.2f GB/s)) "
64+
" Time for unpack_tensor: %.3f s (%.2f GB/s)) "
6565
% (td, ((N * 8 / td) / 2 ** 30)),
6666
)
6767
assert np.array_equal(in_, out)

blosc2/c-blosc2

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55
########################################################################
66

77

8-
# A simple example using the pack_array2 and unpack_array2 functions
8+
# A simple example using the pack_tensor and unpack_tensor functions
99

1010
import numpy as np
1111
import blosc2
1212

1313
a = np.arange(1_000_000)
1414

1515
cparams = {"codec": blosc2.Codec.BLOSCLZ}
16-
cframe = blosc2.pack_array2(a, cparams=cparams)
16+
cframe = blosc2.pack_tensor(a, cparams=cparams)
1717
print("Length of packed array in bytes:", len(cframe))
1818

19-
a2 = blosc2.unpack_array2(cframe)
19+
a2 = blosc2.unpack_tensor(cframe)
2020
assert np.alltrue(a == a2)
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
########################################################################
66

77

8-
# A simple example using the save_array and load_array functions
8+
# A simple example using the save_tensor and load_tensor functions
99

1010
import numpy as np
1111
import blosc2
1212

1313
a = np.arange(1_000_000)
1414

15-
file_size = blosc2.save_array(a, "save_array.bl2", mode="w")
16-
print("Length of saved array in file (bytes):", file_size)
15+
file_size = blosc2.save_tensor(a, "save_tensor.bl2", mode="w")
16+
print("Length of saved tensor in file (bytes):", file_size)
1717

18-
a2 = blosc2.load_array("save_array.bl2")
18+
a2 = blosc2.load_tensor("save_tensor.bl2")
1919
assert np.alltrue(a == a2)

examples/slicing_and_beyond.ipynb

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,24 @@
5959
"metadata": {},
6060
"outputs": [
6161
{
62-
"name": "stdout",
63-
"output_type": "stream",
64-
"text": [
65-
"b'\\x00\\x00\\x00\\x00'\n"
66-
]
62+
"data": {
63+
"text/plain": "bytes"
64+
},
65+
"execution_count": 3,
66+
"metadata": {},
67+
"output_type": "execute_result"
6768
}
6869
],
6970
"source": [
7071
"out_slice = schunk[:]\n",
71-
"print(out_slice[:4])"
72+
"type(out_slice)"
7273
]
7374
},
7475
{
7576
"cell_type": "markdown",
7677
"metadata": {},
7778
"source": [
78-
"As you can see, the data is returned as a bytestring. If we want to better visualize the data, we will use `get_slice`. You can pass any Python object (supporting the Buffer Protocol) as the `out` param to fill it with the data."
79+
"As you can see, the data is returned as a bytes object. If we want to get a more meaningful container instead, we can use `get_slice`, where you can pass any Python object (supporting the Buffer Protocol) as the `out` param to fill it with the data. In this case we will use a NumPy array contaner."
7980
]
8081
},
8182
{
@@ -102,11 +103,11 @@
102103
"cell_type": "markdown",
103104
"metadata": {},
104105
"source": [
105-
"That looks better!\n",
106+
"That's the expected data indeed!\n",
106107
"\n",
107108
"## Setting data in a SChunk\n",
108109
"\n",
109-
"We can also set the data of an area to any python object supporting the Buffer Protocol. Let's see a quick example:"
110+
"We can also set the data of a `SChunk` area from any Python object supporting the Buffer Protocol. Let's see a quick example:"
110111
]
111112
},
112113
{
@@ -125,7 +126,7 @@
125126
"cell_type": "markdown",
126127
"metadata": {},
127128
"source": [
128-
"So now, we are able to get or set data all at once. But what if we would like to add data? Well, you can still do it with `__setitem__`. Indeed, this method can update and append data at the same time. To do so, `stop` will be the new SChunk nitems:"
129+
"We have seen how to get or set data. But what if we would like to add data? Well, you can still do that with `__setitem__`."
129130
]
130131
},
131132
{
@@ -142,6 +143,15 @@
142143
"schunk[start:new_nitems] = new_value"
143144
]
144145
},
146+
{
147+
"cell_type": "markdown",
148+
"source": [
149+
"Here, `start` is less than the number of elements in `SChunk` and `new_items` is larger than this; that means that `__setitem__` can update and append data at the same time, and you don't have to worry about whether you are exceeding the limits of the `SChunk`."
150+
],
151+
"metadata": {
152+
"collapsed": false
153+
}
154+
},
145155
{
146156
"cell_type": "markdown",
147157
"metadata": {},
@@ -155,19 +165,9 @@
155165
"cell_type": "code",
156166
"execution_count": 7,
157167
"metadata": {},
158-
"outputs": [
159-
{
160-
"data": {
161-
"text/plain": "b'\\x9e\\xa8b2f'"
162-
},
163-
"execution_count": 7,
164-
"metadata": {},
165-
"output_type": "execute_result"
166-
}
167-
],
168+
"outputs": [],
168169
"source": [
169-
"buf = schunk.to_cframe()\n",
170-
"buf[:5]"
170+
"buf = schunk.to_cframe()"
171171
]
172172
},
173173
{
@@ -190,9 +190,7 @@
190190
"cell_type": "markdown",
191191
"metadata": {},
192192
"source": [
193-
"In this case we set the `copy` param to `True`. If you do not want to copy the buffer,\n",
194-
"be mindful that you will have to keep a reference to it until you do not\n",
195-
"want the SChunk anymore.\n",
193+
"In this case we set the `copy` param to `True`. If you do not want to copy the buffer, be mindful that you will have to keep a reference to it until you do not want the SChunk anymore.\n",
196194
"\n",
197195
"\n",
198196
"## Serializing NumPy arrays\n",
@@ -201,7 +199,7 @@
201199
"\n",
202200
"### In-memory\n",
203201
"\n",
204-
"For obtaining an in-memory representation, you can use `pack_array2`. In comparison with its former version (`pack_array`), it is way faster and does not have the 2 GB size limitation:"
202+
"For obtaining an in-memory representation, you can use `pack_tensor`. In comparison with its former version (`pack_array`), it is way faster and does not have the 2 GB size limitation:"
205203
]
206204
},
207205
{
@@ -210,10 +208,10 @@
210208
"metadata": {},
211209
"outputs": [],
212210
"source": [
213-
"np_array = np.arange(2**30 + 1, dtype=np.int32) # 2 GB (+4) array\n",
211+
"np_array = np.arange(2**30, dtype=np.int32) # 4 GB array\n",
214212
"\n",
215-
"packed_arr2 = blosc2.pack_array2(np_array)\n",
216-
"unpacked_arr2 = blosc2.unpack_array2(packed_arr2)"
213+
"packed_arr2 = blosc2.pack_tensor(np_array)\n",
214+
"unpacked_arr2 = blosc2.unpack_tensor(packed_arr2)"
217215
]
218216
},
219217
{
@@ -222,17 +220,26 @@
222220
"source": [
223221
"### On-disk\n",
224222
"\n",
225-
"To store the serialized buffer on-disk you want to use `save_array` and `load_array`:"
223+
"To store the serialized buffer on-disk you want to use `save_tensor` and `load_tensor`:"
226224
]
227225
},
228226
{
229227
"cell_type": "code",
230228
"execution_count": 10,
231229
"metadata": {},
232-
"outputs": [],
230+
"outputs": [
231+
{
232+
"data": {
233+
"text/plain": "True"
234+
},
235+
"execution_count": 10,
236+
"metadata": {},
237+
"output_type": "execute_result"
238+
}
239+
],
233240
"source": [
234-
"blosc2.save_array(np_array, urlpath=\"ondisk_array.b2frame\", mode=\"w\")\n",
235-
"np_array2 = blosc2.load_array(\"ondisk_array.b2frame\")\n",
241+
"blosc2.save_tensor(np_array, urlpath=\"ondisk_array.b2frame\", mode=\"w\")\n",
242+
"np_array2 = blosc2.load_tensor(\"ondisk_array.b2frame\")\n",
236243
"np.array_equal(np_array, np_array2)"
237244
]
238245
},

0 commit comments

Comments
 (0)