Skip to content

Commit 369a677

Browse files
authored
Merge pull request numpy#20993 from seberg/fixup-fromiter
ENH: Allow object and subarray dtypes in fromiter
2 parents 247cb34 + 7b20674 commit 369a677

File tree

6 files changed

+140
-76
lines changed

6 files changed

+140
-76
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
``np.fromiter`` now accepts objects and subarrays
2+
-------------------------------------------------
3+
The `~numpy.fromiter` function now supports object and
4+
subarray dtypes. Please see he function documentation for
5+
examples.

numpy/core/_add_newdocs.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,11 @@
13981398
An iterable object providing data for the array.
13991399
dtype : data-type
14001400
The data-type of the returned array.
1401+
1402+
.. versionchanged:: 1.23
1403+
Object and subarray dtypes are now supported (note that the final
1404+
result is not 1-D for a subarray dtype).
1405+
14011406
count : int, optional
14021407
The number of items to read from *iterable*. The default is -1,
14031408
which means all data is read.
@@ -1421,6 +1426,18 @@
14211426
>>> np.fromiter(iterable, float)
14221427
array([ 0., 1., 4., 9., 16.])
14231428
1429+
A carefully constructed subarray dtype will lead to higher dimensional
1430+
results:
1431+
1432+
>>> iterable = ((x+1, x+2) for x in range(5))
1433+
>>> np.fromiter(iterable, dtype=np.dtype((int, 2)))
1434+
array([[1, 2],
1435+
[2, 3],
1436+
[3, 4],
1437+
[4, 5],
1438+
[5, 6]])
1439+
1440+
14241441
""".replace(
14251442
"${ARRAY_FUNCTION_LIKE}",
14261443
array_function_like_doc,

numpy/core/src/multiarray/common.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -127,23 +127,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
127127
return 0;
128128
}
129129

130-
NPY_NO_EXPORT char *
131-
index2ptr(PyArrayObject *mp, npy_intp i)
132-
{
133-
npy_intp dim0;
134-
135-
if (PyArray_NDIM(mp) == 0) {
136-
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed");
137-
return NULL;
138-
}
139-
dim0 = PyArray_DIMS(mp)[0];
140-
if (check_and_adjust_index(&i, dim0, 0, NULL) < 0)
141-
return NULL;
142-
if (i == 0) {
143-
return PyArray_DATA(mp);
144-
}
145-
return PyArray_BYTES(mp)+i*PyArray_STRIDES(mp)[0];
146-
}
147130

148131
NPY_NO_EXPORT int
149132
_zerofill(PyArrayObject *ret)

numpy/core/src/multiarray/common.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,6 @@ NPY_NO_EXPORT int
4343
PyArray_DTypeFromObject(PyObject *obj, int maxdims,
4444
PyArray_Descr **out_dtype);
4545

46-
NPY_NO_EXPORT int
47-
PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
48-
PyArray_Descr **out_dtype, int string_status);
4946

5047
/*
5148
* Returns NULL without setting an exception if no scalar is matched, a
@@ -54,12 +51,6 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
5451
NPY_NO_EXPORT PyArray_Descr *
5552
_array_find_python_scalar_type(PyObject *op);
5653

57-
NPY_NO_EXPORT PyArray_Descr *
58-
_array_typedescr_fromstr(char const *str);
59-
60-
NPY_NO_EXPORT char *
61-
index2ptr(PyArrayObject *mp, npy_intp i);
62-
6354
NPY_NO_EXPORT int
6455
_zerofill(PyArrayObject *ret);
6556

numpy/core/src/multiarray/ctors.c

Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3894,11 +3894,9 @@ PyArray_FromString(char *data, npy_intp slen, PyArray_Descr *dtype,
38943894
NPY_NO_EXPORT PyObject *
38953895
PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
38963896
{
3897-
PyObject *value;
38983897
PyObject *iter = NULL;
38993898
PyArrayObject *ret = NULL;
39003899
npy_intp i, elsize, elcount;
3901-
char *item, *new_data;
39023900

39033901
if (dtype == NULL) {
39043902
return NULL;
@@ -3910,6 +3908,7 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
39103908
}
39113909

39123910
if (PyDataType_ISUNSIZED(dtype)) {
3911+
/* If this error is removed, the `ret` allocation may need fixing */
39133912
PyErr_SetString(PyExc_ValueError,
39143913
"Must specify length when using variable-size data-type.");
39153914
goto done;
@@ -3927,38 +3926,50 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
39273926
elsize = dtype->elsize;
39283927

39293928
/*
3930-
* We would need to alter the memory RENEW code to decrement any
3931-
* reference counts before throwing away any memory.
3929+
* Note that PyArray_DESCR(ret) may not match dtype. There are exactly
3930+
* two cases where this can happen: empty strings/bytes/void (rejected
3931+
* above) and subarray dtypes (supported by sticking with `dtype`).
39323932
*/
3933-
if (PyDataType_REFCHK(dtype)) {
3934-
PyErr_SetString(PyExc_ValueError,
3935-
"cannot create object arrays from iterator");
3936-
goto done;
3937-
}
3938-
3933+
Py_INCREF(dtype);
39393934
ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1,
39403935
&elcount, NULL,NULL, 0, NULL);
3941-
dtype = NULL;
39423936
if (ret == NULL) {
39433937
goto done;
39443938
}
3945-
for (i = 0; (i < count || count == -1) &&
3946-
(value = PyIter_Next(iter)); i++) {
3947-
if (i >= elcount && elsize != 0) {
3939+
#ifdef NPY_RELAXED_STRIDES_DEBUG
3940+
/* Incompatible with NPY_RELAXED_STRIDES_DEBUG due to growing */
3941+
if (elcount == 1) {
3942+
PyArray_STRIDES(ret)[0] = elsize;
3943+
}
3944+
#endif /* NPY_RELAXED_STRIDES_DEBUG */
3945+
3946+
3947+
char *item = PyArray_BYTES(ret);
3948+
for (i = 0; i < count || count == -1; i++, item += elsize) {
3949+
PyObject *value = PyIter_Next(iter);
3950+
if (value == NULL) {
3951+
if (PyErr_Occurred()) {
3952+
/* Fetching next item failed rather than exhausting iterator */
3953+
goto done;
3954+
}
3955+
break;
3956+
}
3957+
3958+
if (NPY_UNLIKELY(i >= elcount) && elsize != 0) {
3959+
char *new_data = NULL;
39483960
npy_intp nbytes;
39493961
/*
39503962
Grow PyArray_DATA(ret):
39513963
this is similar for the strategy for PyListObject, but we use
39523964
50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ...
3965+
TODO: The loadtxt code now uses a `growth` helper that would
3966+
be suitable to reuse here.
39533967
*/
39543968
elcount = (i >> 1) + (i < 4 ? 4 : 2) + i;
39553969
if (!npy_mul_with_overflow_intp(&nbytes, elcount, elsize)) {
39563970
/* The handler is always valid */
3957-
new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), nbytes,
3958-
PyArray_HANDLER(ret));
3959-
}
3960-
else {
3961-
new_data = NULL;
3971+
new_data = PyDataMem_UserRENEW(
3972+
PyArray_BYTES(ret), nbytes, PyArray_HANDLER(ret));
39623973
}
39633974
if (new_data == NULL) {
39643975
PyErr_SetString(PyExc_MemoryError,
@@ -3967,44 +3978,52 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
39673978
goto done;
39683979
}
39693980
((PyArrayObject_fields *)ret)->data = new_data;
3981+
/* resize array for cleanup: */
3982+
PyArray_DIMS(ret)[0] = elcount;
3983+
/* Reset `item` pointer to point into realloc'd chunk */
3984+
item = new_data + i * elsize;
3985+
if (PyDataType_FLAGCHK(dtype, NPY_NEEDS_INIT)) {
3986+
/* Initialize new chunk: */
3987+
memset(item, 0, nbytes - i * elsize);
3988+
}
39703989
}
3971-
PyArray_DIMS(ret)[0] = i + 1;
39723990

3973-
if (((item = index2ptr(ret, i)) == NULL) ||
3974-
PyArray_SETITEM(ret, item, value) == -1) {
3991+
if (PyArray_Pack(dtype, item, value) < 0) {
39753992
Py_DECREF(value);
39763993
goto done;
39773994
}
39783995
Py_DECREF(value);
39793996
}
39803997

39813998

3982-
if (PyErr_Occurred()) {
3983-
goto done;
3984-
}
39853999
if (i < count) {
3986-
PyErr_SetString(PyExc_ValueError,
3987-
"iterator too short");
4000+
PyErr_Format(PyExc_ValueError,
4001+
"iterator too short: Expected %zd but iterator had only %zd "
4002+
"items.", (Py_ssize_t)count, (Py_ssize_t)i);
39884003
goto done;
39894004
}
39904005

39914006
/*
3992-
* Realloc the data so that don't keep extra memory tied up
3993-
* (assuming realloc is reasonably good about reusing space...)
4007+
* Realloc the data so that don't keep extra memory tied up and fix
4008+
* the arrays first dimension (there could be more than one).
39944009
*/
39954010
if (i == 0 || elsize == 0) {
39964011
/* The size cannot be zero for realloc. */
3997-
goto done;
39984012
}
3999-
/* The handler is always valid */
4000-
new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), i * elsize,
4001-
PyArray_HANDLER(ret));
4002-
if (new_data == NULL) {
4003-
PyErr_SetString(PyExc_MemoryError,
4004-
"cannot allocate array memory");
4005-
goto done;
4013+
else {
4014+
/* Resize array to actual final size (it may be too large) */
4015+
/* The handler is always valid */
4016+
char *new_data = PyDataMem_UserRENEW(
4017+
PyArray_DATA(ret), i * elsize, PyArray_HANDLER(ret));
4018+
4019+
if (new_data == NULL) {
4020+
PyErr_SetString(PyExc_MemoryError,
4021+
"cannot allocate array memory");
4022+
goto done;
4023+
}
4024+
((PyArrayObject_fields *)ret)->data = new_data;
40064025
}
4007-
((PyArrayObject_fields *)ret)->data = new_data;
4026+
PyArray_DIMS(ret)[0] = i;
40084027

40094028
done:
40104029
Py_XDECREF(iter);

numpy/core/tests/test_numeric.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,19 +1202,68 @@ def load_data(self, n, eindex):
12021202
raise NIterError('error at index %s' % eindex)
12031203
yield e
12041204

1205-
def test_2592(self):
1206-
# Test iteration exceptions are correctly raised.
1207-
count, eindex = 10, 5
1208-
assert_raises(NIterError, np.fromiter,
1209-
self.load_data(count, eindex), dtype=int, count=count)
1210-
1211-
def test_2592_edge(self):
1212-
# Test iter. exceptions, edge case (exception at end of iterator).
1213-
count = 10
1214-
eindex = count-1
1215-
assert_raises(NIterError, np.fromiter,
1216-
self.load_data(count, eindex), dtype=int, count=count)
1205+
@pytest.mark.parametrize("dtype", [int, object])
1206+
@pytest.mark.parametrize(["count", "error_index"], [(10, 5), (10, 9)])
1207+
def test_2592(self, count, error_index, dtype):
1208+
# Test iteration exceptions are correctly raised. The data/generator
1209+
# has `count` elements but errors at `error_index`
1210+
iterable = self.load_data(count, error_index)
1211+
with pytest.raises(NIterError):
1212+
np.fromiter(iterable, dtype=dtype, count=count)
1213+
1214+
@pytest.mark.parametrize("dtype", ["S", "S0", "V0", "U0"])
1215+
def test_empty_not_structured(self, dtype):
1216+
# Note, "S0" could be allowed at some point, so long "S" (without
1217+
# any length) is rejected.
1218+
with pytest.raises(ValueError, match="Must specify length"):
1219+
np.fromiter([], dtype=dtype)
12171220

1221+
@pytest.mark.parametrize("dtype",
1222+
# Note that `np.dtype(("O", (10, 5)))` is a subarray dtype
1223+
["d", "i,O", np.dtype(("O", (10, 5))), "O"])
1224+
def test_growth_and_complicated_dtypes(self, dtype):
1225+
dtype = np.dtype(dtype)
1226+
data = [1, 2, 3, 4, 5, 6, 7, 8, 9] * 100 # make sure we realloc a bit
1227+
1228+
class MyIter:
1229+
# Class/example from gh-15789
1230+
def __length_hint__(self):
1231+
# only required to be an estimate, this is legal
1232+
return 1
1233+
1234+
def __iter__(self):
1235+
return iter(data)
1236+
1237+
res = np.fromiter(MyIter(), dtype=dtype)
1238+
expected = np.array(data, dtype=dtype)
1239+
1240+
assert_array_equal(res, expected)
1241+
1242+
def test_empty_result(self):
1243+
class MyIter:
1244+
def __length_hint__(self):
1245+
return 10
1246+
1247+
def __iter__(self):
1248+
return iter([]) # actual iterator is empty.
1249+
1250+
res = np.fromiter(MyIter(), dtype="d")
1251+
assert res.shape == (0,)
1252+
assert res.dtype == "d"
1253+
1254+
def test_too_few_items(self):
1255+
msg = "iterator too short: Expected 10 but iterator had only 3 items."
1256+
with pytest.raises(ValueError, match=msg):
1257+
np.fromiter([1, 2, 3], count=10, dtype=int)
1258+
1259+
def test_failed_itemsetting(self):
1260+
with pytest.raises(TypeError):
1261+
np.fromiter([1, None, 3], dtype=int)
1262+
1263+
# The following manages to hit somewhat trickier code paths:
1264+
iterable = ((2, 3, 4) for i in range(5))
1265+
with pytest.raises(ValueError):
1266+
np.fromiter(iterable, dtype=np.dtype((int, 2)))
12181267

12191268
class TestNonzero:
12201269
def test_nonzero_trivial(self):

0 commit comments

Comments
 (0)