Skip to content

Commit e9ad875

Browse files
IndifferentAreaMaanasArora
authored andcommitted
ENH: support no-copy pickling for any array that can be transposed to a C-contiguous array (numpy#28105)
* add support for transposed contiguous array and corresponding test * fix lint * rm bad includes * fix * fix * discard vla * accept legacy pkl * check return * fix ci * check after transpose * discard auto include * use N format in pybuildvalue * use pytuple pack * clean up * add comment on fall back behavior * add mroe test * use byte str instead of pkl to test * get shape with pyarray api * fmt * better comment * fix as suggested * fix lint * discard auto include * shorter tested strs * fmt * last correct commit * last correct commit * [skip ci] memory leak * add cleanup * [skip ci] add release doc * [skip ci] add check for pylong_fromlong * typo * fix memory leak * more clean up * use same random seed * use xdecref * check PyArray_Transpose * check PyArray_IntTupleFromIntp * fix * once-over * add additional test * once over again * fmt * pickle format consistency * fix incorrect usage of cpy api * fmt
1 parent e232ac5 commit e9ad875

File tree

4 files changed

+125
-41
lines changed

4 files changed

+125
-41
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
* No-copy pickling is now supported for any
2+
array that can be transposed to a C-contiguous array.

numpy/_core/numeric.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1920,7 +1920,9 @@ def fromfunction(function, shape, *, dtype=float, like=None, **kwargs):
19201920
_fromfunction_with_like = array_function_dispatch()(fromfunction)
19211921

19221922

1923-
def _frombuffer(buf, dtype, shape, order):
1923+
def _frombuffer(buf, dtype, shape, order, axis_order=None):
1924+
if order == 'K' and axis_order is not None:
1925+
return frombuffer(buf, dtype=dtype).reshape(shape, order='C').transpose(axis_order)
19241926
return frombuffer(buf, dtype=dtype).reshape(shape, order=order)
19251927

19261928

numpy/_core/src/multiarray/methods.c

Lines changed: 81 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,77 +1845,116 @@ array_reduce_ex_regular(PyArrayObject *self, int NPY_UNUSED(protocol))
18451845
static PyObject *
18461846
array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol)
18471847
{
1848-
PyObject *numeric_mod = NULL, *from_buffer_func = NULL;
1849-
PyObject *pickle_module = NULL, *picklebuf_class = NULL;
1850-
PyObject *picklebuf_args = NULL;
1848+
PyObject *from_buffer_func = NULL;
1849+
PyObject *picklebuf_class = NULL;
18511850
PyObject *buffer = NULL, *transposed_array = NULL;
18521851
PyArray_Descr *descr = NULL;
1852+
PyObject *rev_perm = NULL; // only used in 'K' order
18531853
char order;
18541854

18551855
descr = PyArray_DESCR(self);
18561856

18571857
/* we expect protocol 5 to be available in Python 3.8 */
1858-
pickle_module = PyImport_ImportModule("pickle");
1859-
if (pickle_module == NULL){
1860-
return NULL;
1861-
}
1862-
picklebuf_class = PyObject_GetAttrString(pickle_module, "PickleBuffer");
1863-
Py_DECREF(pickle_module);
1864-
if (picklebuf_class == NULL) {
1858+
if (npy_cache_import_runtime("pickle", "PickleBuffer", &picklebuf_class) == -1) {
18651859
return NULL;
18661860
}
18671861

18681862
/* Construct a PickleBuffer of the array */
1869-
1870-
if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*) self) &&
1871-
PyArray_IS_F_CONTIGUOUS((PyArrayObject*) self)) {
1863+
if (PyArray_IS_C_CONTIGUOUS((PyArrayObject *)self)) {
1864+
order = 'C';
1865+
}
1866+
else if (PyArray_IS_F_CONTIGUOUS((PyArrayObject *)self)) {
18721867
/* if the array if Fortran-contiguous and not C-contiguous,
18731868
* the PickleBuffer instance will hold a view on the transpose
18741869
* of the initial array, that is C-contiguous. */
18751870
order = 'F';
1876-
transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
1877-
picklebuf_args = Py_BuildValue("(N)", transposed_array);
1871+
transposed_array = PyArray_Transpose((PyArrayObject *)self, NULL);
1872+
if (transposed_array == NULL) {
1873+
return NULL;
1874+
}
18781875
}
18791876
else {
1880-
order = 'C';
1881-
picklebuf_args = Py_BuildValue("(O)", self);
1882-
}
1883-
if (picklebuf_args == NULL) {
1884-
Py_DECREF(picklebuf_class);
1885-
return NULL;
1877+
order = 'K';
1878+
const int n = PyArray_NDIM(self);
1879+
npy_stride_sort_item items[NPY_MAXDIMS];
1880+
// sort (strde, perm) as descending = transpose to C
1881+
PyArray_CreateSortedStridePerm(n, PyArray_STRIDES(self), items);
1882+
rev_perm = PyTuple_New(n);
1883+
if (rev_perm == NULL) {
1884+
return NULL;
1885+
}
1886+
PyArray_Dims perm;
1887+
npy_intp dims[NPY_MAXDIMS];
1888+
for (int i = 0; i < n; i++) {
1889+
dims[i] = items[i].perm;
1890+
PyObject *idx = PyLong_FromLong(i);
1891+
if (idx == NULL) {
1892+
Py_DECREF(rev_perm);
1893+
return NULL;
1894+
}
1895+
PyTuple_SET_ITEM(rev_perm, items[i].perm, idx);
1896+
}
1897+
perm.ptr = dims;
1898+
perm.len = n;
1899+
transposed_array = PyArray_Transpose((PyArrayObject *)self, &perm);
1900+
if (transposed_array == NULL) {
1901+
Py_DECREF(rev_perm);
1902+
return NULL;
1903+
}
1904+
if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject *)transposed_array)) {
1905+
// self is non-contiguous
1906+
Py_DECREF(rev_perm);
1907+
Py_DECREF(transposed_array);
1908+
return array_reduce_ex_regular(self, protocol);
1909+
}
18861910
}
1887-
1888-
buffer = PyObject_CallObject(picklebuf_class, picklebuf_args);
1889-
Py_DECREF(picklebuf_class);
1890-
Py_DECREF(picklebuf_args);
1911+
buffer = PyObject_CallOneArg(picklebuf_class, transposed_array == NULL ? (PyObject*) self: transposed_array);
18911912
if (buffer == NULL) {
18921913
/* Some arrays may refuse to export a buffer, in which case
18931914
* just fall back on regular __reduce_ex__ implementation
18941915
* (gh-12745).
18951916
*/
1917+
Py_XDECREF(rev_perm);
1918+
Py_XDECREF(transposed_array);
18961919
PyErr_Clear();
18971920
return array_reduce_ex_regular(self, protocol);
18981921
}
18991922

19001923
/* Get the _frombuffer() function for reconstruction */
1901-
1902-
numeric_mod = PyImport_ImportModule("numpy._core.numeric");
1903-
if (numeric_mod == NULL) {
1924+
if (npy_cache_import_runtime("numpy._core.numeric", "_frombuffer",
1925+
&from_buffer_func) == -1) {
1926+
Py_XDECREF(rev_perm);
1927+
Py_XDECREF(transposed_array);
19041928
Py_DECREF(buffer);
19051929
return NULL;
19061930
}
1907-
from_buffer_func = PyObject_GetAttrString(numeric_mod,
1908-
"_frombuffer");
1909-
Py_DECREF(numeric_mod);
1910-
if (from_buffer_func == NULL) {
1931+
1932+
PyObject *shape = NULL;
1933+
if (order == 'K') {
1934+
shape = PyArray_IntTupleFromIntp(
1935+
PyArray_NDIM((PyArrayObject *)transposed_array),
1936+
PyArray_SHAPE((PyArrayObject *)transposed_array));
1937+
}
1938+
else {
1939+
shape = PyArray_IntTupleFromIntp(PyArray_NDIM(self),
1940+
PyArray_SHAPE(self));
1941+
}
1942+
Py_XDECREF(transposed_array);
1943+
if (shape == NULL) {
1944+
Py_XDECREF(rev_perm);
19111945
Py_DECREF(buffer);
19121946
return NULL;
19131947
}
1914-
1915-
return Py_BuildValue("N(NONN)",
1916-
from_buffer_func, buffer, (PyObject *)descr,
1917-
PyObject_GetAttrString((PyObject *)self, "shape"),
1918-
PyUnicode_FromStringAndSize(&order, 1));
1948+
if (order == 'K') {
1949+
return Py_BuildValue("N(NONNN)", from_buffer_func, buffer,
1950+
(PyObject *)descr, shape,
1951+
PyUnicode_FromStringAndSize(&order, 1), rev_perm);
1952+
}
1953+
else {
1954+
return Py_BuildValue("N(NONN)", from_buffer_func, buffer,
1955+
(PyObject *)descr, shape,
1956+
PyUnicode_FromStringAndSize(&order, 1));
1957+
}
19191958
}
19201959

19211960
static PyObject *
@@ -1930,8 +1969,6 @@ array_reduce_ex(PyArrayObject *self, PyObject *args)
19301969

19311970
descr = PyArray_DESCR(self);
19321971
if ((protocol < 5) ||
1933-
(!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
1934-
!PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
19351972
PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
19361973
(PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
19371974
((PyObject*)self)->ob_type != &PyArray_Type) ||
@@ -1943,6 +1980,11 @@ array_reduce_ex(PyArrayObject *self, PyObject *args)
19431980
return array_reduce_ex_regular(self, protocol);
19441981
}
19451982
else {
1983+
/* The func will check internally
1984+
* if the array isn't backed by a contiguous data buffer or
1985+
* if the array refuses to export a buffer
1986+
* In either case, fall back to `array_reduce_ex_regular`
1987+
*/
19461988
return array_reduce_ex_picklebuffer(self, protocol);
19471989
}
19481990
}

numpy/_core/tests/test_multiarray.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4404,17 +4404,55 @@ def test_f_contiguous_array(self):
44044404

44054405
assert_equal(f_contiguous_array, depickled_f_contiguous_array)
44064406

4407+
@pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, reason="requires pickle protocol 5")
4408+
@pytest.mark.parametrize('transposed_contiguous_array',
4409+
[np.random.default_rng(42).random((2, 3, 4)).transpose((1, 0, 2)),
4410+
np.random.default_rng(42).random((2, 3, 4, 5)).transpose((1, 3, 0, 2))] +
4411+
[np.random.default_rng(42).random(np.arange(2, 7)).transpose(np.random.permutation(5)) for _ in range(3)])
4412+
def test_transposed_contiguous_array(self, transposed_contiguous_array):
4413+
buffers = []
4414+
# When using pickle protocol 5, arrays which can be transposed to c_contiguous
4415+
# can be serialized using out-of-band buffers
4416+
bytes_string = pickle.dumps(transposed_contiguous_array, protocol=5,
4417+
buffer_callback=buffers.append)
4418+
4419+
assert len(buffers) > 0
4420+
4421+
depickled_transposed_contiguous_array = pickle.loads(bytes_string,
4422+
buffers=buffers)
4423+
4424+
assert_equal(transposed_contiguous_array, depickled_transposed_contiguous_array)
4425+
4426+
@pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, reason="requires pickle protocol 5")
4427+
def test_load_legacy_pkl_protocol5(self):
4428+
# legacy byte strs are dumped in 2.2.1
4429+
c_contiguous_dumped = b'\x80\x05\x95\x90\x00\x00\x00\x00\x00\x00\x00\x8c\x13numpy._core.numeric\x94\x8c\x0b_frombuffer\x94\x93\x94(\x96\x18\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x94\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02u1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x03K\x04K\x02\x87\x94\x8c\x01C\x94t\x94R\x94.' # noqa
4430+
f_contiguous_dumped = b'\x80\x05\x95\x90\x00\x00\x00\x00\x00\x00\x00\x8c\x13numpy._core.numeric\x94\x8c\x0b_frombuffer\x94\x93\x94(\x96\x18\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x94\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02u1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x03K\x04K\x02\x87\x94\x8c\x01F\x94t\x94R\x94.' # noqa
4431+
transposed_contiguous_dumped = b'\x80\x05\x95\xa5\x00\x00\x00\x00\x00\x00\x00\x8c\x16numpy._core.multiarray\x94\x8c\x0c_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x04K\x03K\x02\x87\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02u1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x18\x00\x01\x08\t\x10\x11\x02\x03\n\x0b\x12\x13\x04\x05\x0c\r\x14\x15\x06\x07\x0e\x0f\x16\x17\x94t\x94b.' # noqa
4432+
no_contiguous_dumped = b'\x80\x05\x95\x91\x00\x00\x00\x00\x00\x00\x00\x8c\x16numpy._core.multiarray\x94\x8c\x0c_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x03K\x02\x86\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02u1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x06\x00\x01\x04\x05\x08\t\x94t\x94b.' # noqa
4433+
x = np.arange(24, dtype='uint8').reshape(3, 4, 2)
4434+
assert_equal(x, pickle.loads(c_contiguous_dumped))
4435+
x = np.arange(24, dtype='uint8').reshape(3, 4, 2, order='F')
4436+
assert_equal(x, pickle.loads(f_contiguous_dumped))
4437+
x = np.arange(24, dtype='uint8').reshape(3, 4, 2).transpose((1, 0, 2))
4438+
assert_equal(x, pickle.loads(transposed_contiguous_dumped))
4439+
x = np.arange(12, dtype='uint8').reshape(3, 4)[:, :2]
4440+
assert_equal(x, pickle.loads(no_contiguous_dumped))
4441+
44074442
def test_non_contiguous_array(self):
44084443
non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2]
44094444
assert not non_contiguous_array.flags.c_contiguous
44104445
assert not non_contiguous_array.flags.f_contiguous
44114446

44124447
# make sure non-contiguous arrays can be pickled-depickled
44134448
# using any protocol
4449+
buffers = []
44144450
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
44154451
depickled_non_contiguous_array = pickle.loads(
4416-
pickle.dumps(non_contiguous_array, protocol=proto))
4452+
pickle.dumps(non_contiguous_array, protocol=proto,
4453+
buffer_callback=buffers.append if proto >= 5 else None))
44174454

4455+
assert_equal(len(buffers), 0)
44184456
assert_equal(non_contiguous_array, depickled_non_contiguous_array)
44194457

44204458
def test_roundtrip(self):

0 commit comments

Comments
 (0)