Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
from .period import PeriodArrayMixin # noqa
from .timedeltas import TimedeltaArrayMixin # noqa
from .integer import ( # noqa
IntegerArray, to_integer_array)
IntegerArray, integer_array)
40 changes: 24 additions & 16 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def construct_from_string(cls, string):
"'{}'".format(cls, string))


def to_integer_array(values, dtype=None):
def integer_array(values, dtype=None, copy=False):
"""
Infer and return an integer array of the values.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update doc-string


Expand All @@ -94,7 +94,8 @@ def to_integer_array(values, dtype=None):
------
TypeError if incompatible types
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there still a ref to to_integer_array? (I see it in the diff, but I also see that you changed it above)

return IntegerArray(values, dtype=dtype, copy=False)
values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
return IntegerArray(values, mask)


def safe_cast(values, dtype, copy):
Expand Down Expand Up @@ -206,7 +207,7 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin):
def dtype(self):
return _dtypes[str(self._data.dtype)]

def __init__(self, values, mask=None, dtype=None, copy=False):
def __init__(self, values, mask, copy=False):
"""
Parameters
----------
Expand All @@ -219,25 +220,33 @@ def __init__(self, values, mask=None, dtype=None, copy=False):
-------
IntegerArray
"""
self._data, self._mask = coerce_to_array(
values, dtype=dtype, mask=mask, copy=copy)
if not (isinstance(values, np.ndarray)
and np.issubdtype(values.dtype, np.integer)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is is_integer_dtype

raise TypeError("values should be integer numpy array")
if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use is_bool_dtype

raise TypeError("mask should be boolean numpy array")

if copy:
values = values.copy()
mask = mask.copy()

self._data = values
self._mask = mask

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
return cls(scalars, dtype=dtype, copy=copy)
return integer_array(scalars, dtype=dtype, copy=copy)

@classmethod
def _from_factorized(cls, values, original):
return cls(values, dtype=original.dtype)
return integer_array(values, dtype=original.dtype)

def __getitem__(self, item):
if is_integer(item):
if self._mask[item]:
return self.dtype.na_value
return self._data[item]
return type(self)(self._data[item],
mask=self._mask[item],
dtype=self.dtype)
return type(self)(self._data[item], self._mask[item])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback additional question: what do you find of writing IntegerArray(...) instead of type(self)(...) ?

Python perfectly allows that (and is the same here, as we don't subclass this one further), and I personally find that easier to read.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like using type() as I expect this to be subclasses for BooleanArray, though it could be that this not needed for that, but want to keep open that possiblitiy.


def _coerce_to_ndarray(self):
"""
Expand Down Expand Up @@ -294,7 +303,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
result[fill_mask] = fill_value
mask = mask ^ fill_mask

return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
return type(self)(result, mask, copy=False)

def copy(self, deep=False):
data, mask = self._data, self._mask
Expand All @@ -304,7 +313,7 @@ def copy(self, deep=False):
else:
data = data.copy()
mask = mask.copy()
return type(self)(data, mask, dtype=self.dtype, copy=False)
return type(self)(data, mask, copy=False)

def __setitem__(self, key, value):
_is_scalar = is_scalar(value)
Expand Down Expand Up @@ -356,7 +365,7 @@ def _na_value(self):
def _concat_same_type(cls, to_concat):
data = np.concatenate([x._data for x in to_concat])
mask = np.concatenate([x._mask for x in to_concat])
return cls(data, mask=mask, dtype=to_concat[0].dtype)
return cls(data, mask)

def astype(self, dtype, copy=True):
"""Cast to a NumPy array or IntegerArray with 'dtype'.
Expand Down Expand Up @@ -386,8 +395,7 @@ def astype(self, dtype, copy=True):
if isinstance(dtype, _IntegerDtype):
result = self._data.astype(dtype.numpy_dtype,
casting='same_kind', copy=False)
return type(self)(result, mask=self._mask,
dtype=dtype, copy=False)
return type(self)(result, mask=self._mask, copy=False)

# coerce
data = self._coerce_to_ndarray()
Expand Down Expand Up @@ -523,7 +531,7 @@ def _maybe_mask_result(self, result, mask, other, op_name):
result[mask] = np.nan
return result

return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
return type(self)(result, mask, copy=False)

@classmethod
def _create_arithmetic_method(cls, op):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
if not (dtype is None or is_object_dtype(dtype)):

# coerce to the provided dtype
data = dtype.construct_array_type()(
data = dtype.construct_array_type()._from_sequence(
data, dtype=dtype, copy=False)

# coerce to the object dtype
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4095,7 +4095,7 @@ def _try_cast(arr, take_fast_path):
ordered=dtype.ordered)
elif is_extension_array_dtype(dtype):
# create an extension array from its dtype
array_type = dtype.construct_array_type()
array_type = dtype.construct_array_type()._from_sequence
subarr = array_type(subarr, dtype=dtype, copy=copy)

elif dtype is not None and raise_cast_failure:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_fillna_series_method(self, data_missing, method):
fill_value = data_missing[1]

if method == 'ffill':
data_missing = type(data_missing)(data_missing[::-1])
data_missing = data_missing[::-1]

result = pd.Series(data_missing).fillna(method=method)
expected = pd.Series(
Expand Down
25 changes: 12 additions & 13 deletions pandas/tests/extension/integer/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas.core.dtypes.generic import ABCIndexClass

from pandas.core.arrays import (
to_integer_array, IntegerArray)
integer_array, IntegerArray)
from pandas.core.arrays.integer import (
Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
Expand All @@ -31,12 +31,12 @@ def dtype(request):

@pytest.fixture
def data(dtype):
return IntegerArray(make_data(), dtype=dtype)
return integer_array(make_data(), dtype=dtype)


@pytest.fixture
def data_missing(dtype):
return IntegerArray([np.nan, 1], dtype=dtype)
return integer_array([np.nan, 1], dtype=dtype)


@pytest.fixture
Expand All @@ -49,12 +49,12 @@ def gen(count):

@pytest.fixture
def data_for_sorting(dtype):
return IntegerArray([1, 2, 0], dtype=dtype)
return integer_array([1, 2, 0], dtype=dtype)


@pytest.fixture
def data_missing_for_sorting(dtype):
return IntegerArray([1, np.nan, 0], dtype=dtype)
return integer_array([1, np.nan, 0], dtype=dtype)


@pytest.fixture
Expand All @@ -74,7 +74,7 @@ def data_for_grouping(dtype):
a = 0
c = 2
na = np.nan
return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)
return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)


def test_dtypes(dtype):
Expand Down Expand Up @@ -494,8 +494,7 @@ def test_construct_index(self, all_data, dropna):
else:
other = all_data

result = pd.Index(IntegerArray(other,
dtype=all_data.dtype))
result = pd.Index(integer_array(other, dtype=all_data.dtype))
expected = pd.Index(other, dtype=object)

self.assert_index_equal(result, expected)
Expand Down Expand Up @@ -584,14 +583,14 @@ def test_construct_cast_invalid(self, dtype):
msg = "cannot safely"
arr = [1.2, 2.3, 3.7]
with tm.assert_raises_regex(TypeError, msg):
IntegerArray(arr, dtype=dtype)
integer_array(arr, dtype=dtype)

with tm.assert_raises_regex(TypeError, msg):
pd.Series(arr).astype(dtype)

arr = [1.2, 2.3, 3.7, np.nan]
with tm.assert_raises_regex(TypeError, msg):
IntegerArray(arr, dtype=dtype)
integer_array(arr, dtype=dtype)

with tm.assert_raises_regex(TypeError, msg):
pd.Series(arr).astype(dtype)
Expand Down Expand Up @@ -658,7 +657,7 @@ def test_conversions(data_missing):
def test_to_integer_array_error(values):
# error in converting existing arrays to IntegerArrays
with pytest.raises(TypeError):
to_integer_array(values)
integer_array(values)


@pytest.mark.parametrize(
Expand All @@ -669,8 +668,8 @@ def test_to_integer_array_error(values):
(np.array([1, np.nan]), 'int8', Int8Dtype)])
def test_to_integer_array(values, to_dtype, result_dtype):
# convert existing arrays to IntegerArrays
result = to_integer_array(values, dtype=to_dtype)
expected = IntegerArray(values, dtype=result_dtype())
result = integer_array(values, dtype=to_dtype)
expected = integer_array(values, dtype=result_dtype())
tm.assert_extension_array_equal(result, expected)


Expand Down