Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
is_array_like,
is_bool_dtype,
is_dataclass,
is_datetime64_any_dtype,
is_dict_like,
is_float,
is_float_dtype,
Expand All @@ -103,6 +104,7 @@
is_list_like,
is_scalar,
is_sequence,
is_timedelta64_dtype,
needs_i8_conversion,
pandas_dtype,
)
Expand Down Expand Up @@ -11350,6 +11352,13 @@ def cov(
c -0.150812 0.191417 0.895202
"""
data = self._get_numeric_data() if numeric_only else self
dtypes = [blk.dtype for blk in self._mgr.blocks]
if any(is_datetime64_any_dtype(d) or is_timedelta64_dtype(d) for d in dtypes):
msg = (
"DataFrame contains columns with dtype datetime64 "
"or timedelta64, which are not supported for cov."
)
raise TypeError(msg)
cols = data.columns
idx = cols.copy()
mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1800,6 +1800,8 @@ def as_array(
arr = np.asarray(blk.values, dtype=dtype)
else:
arr = np.array(blk.values, dtype=dtype, copy=copy)
if passed_nan and blk.dtype.kind in ["m", "M"]:
arr[isna(blk.values)] = na_value

if not copy:
arr = arr.view()
Expand Down Expand Up @@ -1865,6 +1867,8 @@ def _interleave(
else:
arr = blk.get_values(dtype)
result[rl.indexer] = arr
if na_value is not lib.no_default and blk.dtype.kind in ["m", "M"]:
result[rl.indexer][isna(arr)] = na_value
itemmask[rl.indexer] = 1

if not itemmask.all():
Expand Down
36 changes: 36 additions & 0 deletions pandas/tests/frame/methods/test_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

from pandas import (
DataFrame,
NaT,
Timestamp,
date_range,
)
import pandas._testing as tm

Expand Down Expand Up @@ -41,3 +43,37 @@ def test_to_numpy_mixed_dtype_to_str(self):
result = df.to_numpy(dtype=str)
expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str)
tm.assert_numpy_array_equal(result, expected)

def test_to_numpy_datetime_with_na(self):
# GH #53115
dti = date_range("2016-01-01", periods=3)
df = DataFrame(dti)
df.iloc[0, 0] = NaT
expected = np.array([[np.nan], [1.45169280e18], [1.45177920e18]])
assert np.allclose(
df.to_numpy(float, na_value=np.nan), expected, equal_nan=True
)

df = DataFrame(
{
"a": [Timestamp("1970-01-01"), Timestamp("1970-01-02"), NaT],
"b": [
Timestamp("1970-01-01"),
np.nan,
Timestamp("1970-01-02"),
],
"c": [
1,
np.nan,
2,
],
}
)
arr = np.array(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
arr = np.array(
expected = np.array(

[
[0.00e00, 0.00e00, 1.00e00],
[8.64e04, np.nan, np.nan],
[np.nan, 8.64e04, 2.00e00],
]
)
assert np.allclose(df.to_numpy(float, na_value=np.nan), arr, equal_nan=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert np.allclose(df.to_numpy(float, na_value=np.nan), arr, equal_nan=True)
result = df.to_numpy(float, na_value=np.nan),
tm.assert_numpy_array_equal(result, expected)

33 changes: 33 additions & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,39 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype):
expected = Series([pd.NA, pd.NA], dtype=exp_dtype, index=Index([0, 1]))
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"data",
[
{"a": [0, 1, 2], "b": [pd.NaT, pd.NaT, pd.NaT]},
{"a": [0, 1, 2], "b": [Timestamp("1990-01-01"), pd.NaT, pd.NaT]},
{
"a": [0, 1, 2],
"b": [
Timestamp("1990-01-01"),
Timestamp("1991-01-01"),
Timestamp("1992-01-01"),
],
},
{
"a": [0, 1, 2],
"b": [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.NaT],
},
{
"a": [0, 1, 2],
"b": [
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
],
},
],
)
def test_df_cov_pd_nat(self, data):
# GH #53115
df = DataFrame(data)
with pytest.raises(TypeError, match="not supported for cov"):
df.cov()


def test_sum_timedelta64_skipna_false():
# GH#17235
Expand Down
Loading