Skip to content

Commit 79a5000

Browse files
authored
Merge pull request numpy#21324 from seberg/frombuffer-fix
BUG: Make mmap handling safer in frombuffer
2 parents 03d6e82 + 622ca48 commit 79a5000

File tree

3 files changed

+54
-6
lines changed

3 files changed

+54
-6
lines changed

numpy/core/_add_newdocs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,6 +1557,10 @@
15571557
The data of the resulting array will not be byteswapped, but will be
15581558
interpreted correctly.
15591559
1560+
This function creates a view into the original object. This should be safe
1561+
in general, but it may make sense to copy the result when the original
1562+
object is mutable or untrusted.
1563+
15601564
Examples
15611565
--------
15621566
>>> s = b'hello world'

numpy/core/src/multiarray/ctors.c

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3683,28 +3683,44 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
36833683
return NULL;
36843684
}
36853685

3686+
/*
3687+
* The array check is probably unnecessary. It preserves the base for
3688+
* arrays. This is the "old" buffer protocol, which had no release logic.
3689+
* (It was assumed that the result is always a view.)
3690+
*
3691+
* NOTE: We could also check if `bf_releasebuffer` is defined which should
3692+
* be the most precise and safe thing to do. But that should only be
3693+
* necessary if unexpected backcompat issues are found downstream.
3694+
*/
3695+
if (!PyArray_Check(buf)) {
3696+
buf = PyMemoryView_FromObject(buf);
3697+
if (buf == NULL) {
3698+
return NULL;
3699+
}
3700+
}
3701+
else {
3702+
Py_INCREF(buf);
3703+
}
3704+
36863705
if (PyObject_GetBuffer(buf, &view, PyBUF_WRITABLE|PyBUF_SIMPLE) < 0) {
36873706
writeable = 0;
36883707
PyErr_Clear();
36893708
if (PyObject_GetBuffer(buf, &view, PyBUF_SIMPLE) < 0) {
3709+
Py_DECREF(buf);
36903710
Py_DECREF(type);
36913711
return NULL;
36923712
}
36933713
}
36943714
data = (char *)view.buf;
36953715
ts = view.len;
3696-
/*
3697-
* In Python 3 both of the deprecated functions PyObject_AsWriteBuffer and
3698-
* PyObject_AsReadBuffer that this code replaces release the buffer. It is
3699-
* up to the object that supplies the buffer to guarantee that the buffer
3700-
* sticks around after the release.
3701-
*/
3716+
/* `buf` is an array or a memoryview; so we know `view` does not own data */
37023717
PyBuffer_Release(&view);
37033718

37043719
if ((offset < 0) || (offset > ts)) {
37053720
PyErr_Format(PyExc_ValueError,
37063721
"offset must be non-negative and no greater than buffer "\
37073722
"length (%" NPY_INTP_FMT ")", (npy_intp)ts);
3723+
Py_DECREF(buf);
37083724
Py_DECREF(type);
37093725
return NULL;
37103726
}
@@ -3717,13 +3733,15 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
37173733
if (itemsize == 0) {
37183734
PyErr_SetString(PyExc_ValueError,
37193735
"cannot determine count if itemsize is 0");
3736+
Py_DECREF(buf);
37203737
Py_DECREF(type);
37213738
return NULL;
37223739
}
37233740
if (s % itemsize != 0) {
37243741
PyErr_SetString(PyExc_ValueError,
37253742
"buffer size must be a multiple"\
37263743
" of element size");
3744+
Py_DECREF(buf);
37273745
Py_DECREF(type);
37283746
return NULL;
37293747
}
@@ -3734,6 +3752,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
37343752
PyErr_SetString(PyExc_ValueError,
37353753
"buffer is smaller than requested"\
37363754
" size");
3755+
Py_DECREF(buf);
37373756
Py_DECREF(type);
37383757
return NULL;
37393758
}
@@ -3743,6 +3762,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
37433762
&PyArray_Type, type,
37443763
1, &n, NULL, data,
37453764
NPY_ARRAY_DEFAULT, NULL, buf);
3765+
Py_DECREF(buf);
37463766
if (ret == NULL) {
37473767
return NULL;
37483768
}

numpy/core/tests/test_multiarray.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pathlib
1919
import builtins
2020
from decimal import Decimal
21+
import mmap
2122

2223
import numpy as np
2324
import numpy.core._multiarray_tests as _multiarray_tests
@@ -5475,9 +5476,32 @@ def test_basic(self, byteorder, dtype):
54755476
buf = x.tobytes()
54765477
assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat)
54775478

5479+
def test_array_base(self):
5480+
arr = np.arange(10)
5481+
new = np.frombuffer(arr)
5482+
# We currently special case arrays to ensure they are used as a base.
5483+
# This could probably be changed (removing the test).
5484+
assert new.base is arr
5485+
54785486
def test_empty(self):
54795487
assert_array_equal(np.frombuffer(b''), np.array([]))
54805488

5489+
@pytest.mark.skipif(IS_PYPY,
5490+
reason="PyPy's memoryview currently does not track exports. See: "
5491+
"https://foss.heptapod.net/pypy/pypy/-/issues/3724")
5492+
def test_mmap_close(self):
5493+
# The old buffer protocol was not safe for some things that the new
5494+
# one is. But `frombuffer` always used the old one for a long time.
5495+
# Checks that it is safe with the new one (using memoryviews)
5496+
with tempfile.TemporaryFile(mode='wb') as tmp:
5497+
tmp.write(b"asdf")
5498+
tmp.flush()
5499+
mm = mmap.mmap(tmp.fileno(), 0)
5500+
arr = np.frombuffer(mm, dtype=np.uint8)
5501+
with pytest.raises(BufferError):
5502+
mm.close() # cannot close while array uses the buffer
5503+
del arr
5504+
mm.close()
54815505

54825506
class TestFlat:
54835507
def setup(self):

0 commit comments

Comments
 (0)