Skip to content
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ Other
- Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`)
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
- Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
- Bug in :meth:`DataFrame.pivot_table` with casting the mean of ints back to an int (:issue:`16676`)
- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
- Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`)
- Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`)
Expand All @@ -650,7 +651,6 @@ Other
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
- Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
- Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
-

.. ***DO NOT USE THIS SECTION***

Expand Down
23 changes: 0 additions & 23 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
is_integer_dtype,
is_list_like,
is_nested_list_like,
is_scalar,
Expand Down Expand Up @@ -172,28 +171,6 @@ def __internal_pivot_table(
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
agged = agged.dropna(how="all")

# gh-21133
# we want to down cast if
# the original values are ints
# as we grouped with a NaN value
# and then dropped, coercing to floats
for v in values:
if (
v in data
and is_integer_dtype(data[v])
and v in agged
and not is_integer_dtype(agged[v])
):
if not isinstance(agged[v], ABCDataFrame) and isinstance(
data[v].dtype, np.dtype
):
# exclude DataFrame case bc maybe_downcast_to_dtype expects
# ArrayLike
# e.g. test_pivot_table_multiindex_columns_doctest_case
# agged.columns is a MultiIndex and 'v' is indexing only
# on its first level.
agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)

table = agged

# GH17038, this check should only happen if index is defined (not None)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def test_drop_multiindex_not_lexsorted(self):
lexsorted_mi = MultiIndex.from_tuples(
[("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"]
)
lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
lexsorted_df = DataFrame([[1, 3.0, 4.0]], columns=lexsorted_mi)
assert lexsorted_df.columns._is_lexsorted()

# define the non-lexsorted version
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,7 +1776,7 @@ def test_groupby_multiindex_not_lexsorted():
lexsorted_mi = MultiIndex.from_tuples(
[("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"]
)
lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
lexsorted_df = DataFrame([[1, 3.0, 4.0]], columns=lexsorted_mi)
assert lexsorted_df.columns._is_lexsorted()

# define the non-lexsorted version
Expand Down
71 changes: 40 additions & 31 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def test_pivot_table_categorical(self):
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)

exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
tm.assert_frame_equal(result, expected)

def test_pivot_table_dropna_categoricals(self, dropna):
Expand All @@ -225,7 +225,7 @@ def test_pivot_table_dropna_categoricals(self, dropna):
expected_columns = expected_columns.astype(CDT(categories, ordered=False))
expected_index = Series([1, 2, 3], name="B")
expected = DataFrame(
[[0, 3, 6], [1, 4, 7], [2, 5, 8]],
[[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], [2.0, 5.0, 8.0]],
index=expected_index,
columns=expected_columns,
)
Expand Down Expand Up @@ -283,7 +283,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):

result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": [2, 3, 0]},
{"B": [2.0, 3.0, 0.0]},
index=Index(
Categorical.from_codes(
[0, 1, 2], categories=["low", "high", "left"], ordered=True
Expand All @@ -300,7 +300,9 @@ def test_pivot_with_interval_index(self, interval_values, dropna):
# GH 25814
df = DataFrame({"A": interval_values, "B": 1})
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A"))
expected = DataFrame(
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
)
if not dropna:
expected = expected.astype(float)
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -444,7 +446,7 @@ def test_pivot_no_values(self):
index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M")
)
exp = DataFrame(
[3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
[3.0], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
)
tm.assert_frame_equal(res, exp)

Expand Down Expand Up @@ -1059,7 +1061,7 @@ def test_pivot_table_multiindex_only(self, cols):

result = df2.pivot_table(values="v", columns=cols)
expected = DataFrame(
[[4, 5, 6]],
[[4.0, 5.0, 6.0]],
columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
index=Index(["v"]),
)
Expand Down Expand Up @@ -1558,7 +1560,9 @@ def test_pivot_datetime_tz(self):
exp_col1 = Index(["value1", "value1"])
exp_col2 = Index(["a", "b"], name="label")
exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col)
expected = DataFrame(
[[0.0, 3.0], [1.0, 4.0], [2.0, 5.0]], index=exp_idx, columns=exp_col
)
result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"])
tm.assert_frame_equal(result, expected)

Expand All @@ -1570,18 +1574,35 @@ def test_pivot_datetime_tz(self):
name="dt2",
)
exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3])
expected = DataFrame(
expected1 = DataFrame(
np.array(
[
[0, 3, 1, 2, 0, 3, 1, 2],
[1, 4, 2, 1, 1, 4, 2, 1],
[2, 5, 1, 2, 2, 5, 1, 2],
[
0,
3,
1,
2,
],
[1, 4, 2, 1],
[2, 5, 1, 2],
],
dtype="int64",
),
index=exp_idx,
columns=exp_col,
columns=exp_col[:4],
)
expected2 = DataFrame(
np.array(
[
[0.0, 3.0, 1.0, 2.0],
[1.0, 4.0, 2.0, 1.0],
[2.0, 5.0, 1.0, 2.0],
],
),
index=exp_idx,
columns=exp_col[4:],
)
expected = concat([expected1, expected2], axis=1)

result = pivot_table(
df,
Expand Down Expand Up @@ -1628,7 +1649,7 @@ def test_pivot_dtaccessor(self):

exp_idx = Index(["a", "b"], name="label")
expected = DataFrame(
{7: [0, 3], 8: [1, 4], 9: [2, 5]},
{7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
index=exp_idx,
columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
)
Expand All @@ -1639,7 +1660,7 @@ def test_pivot_dtaccessor(self):
)

expected = DataFrame(
{7: [0, 3], 8: [1, 4], 9: [2, 5]},
{7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
index=Index([1, 2], dtype=np.int32, name="dt2"),
columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
)
Expand All @@ -1660,7 +1681,7 @@ def test_pivot_dtaccessor(self):
names=["dt1", "dt2"],
)
expected = DataFrame(
np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"),
np.array([[0.0, 3.0, 1.0, 4.0, 2.0, 5.0]]),
index=Index([2013], dtype=np.int32),
columns=exp_col,
)
Expand Down Expand Up @@ -1764,13 +1785,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
expected = DataFrame(table.values, index=ix, columns=cols)
tm.assert_frame_equal(table, expected)

def test_categorical_margins(self, observed, request):
if observed:
request.node.add_marker(
pytest.mark.xfail(
reason="GH#17035 (np.mean of ints is casted back to ints)"
)
)
def test_categorical_margins(self, observed):
# GH 10989
df = DataFrame(
{"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
Expand All @@ -1783,13 +1798,7 @@ def test_categorical_margins(self, observed, request):
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
tm.assert_frame_equal(table, expected)

def test_categorical_margins_category(self, observed, request):
if observed:
request.node.add_marker(
pytest.mark.xfail(
reason="GH#17035 (np.mean of ints is casted back to ints)"
)
)
def test_categorical_margins_category(self, observed):
df = DataFrame(
{"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
)
Expand All @@ -1816,7 +1825,7 @@ def test_margins_casted_to_float(self):

result = pivot_table(df, index="D", margins=True)
expected = DataFrame(
{"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]},
{"A": [3.0, 7.0, 5], "B": [2.5, 6.5, 4.5], "C": [2.0, 5.0, 3.5]},
index=Index(["X", "Y", "All"], name="D"),
)
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -2249,7 +2258,7 @@ def test_pivot_table_sort_false_with_multiple_values(self):
index=["lastname", "firstname"], values=["height", "age"], sort=False
)
expected = DataFrame(
[[173, 47], [182, 33]],
[[173.0, 47.0], [182.0, 33.0]],
columns=["height", "age"],
index=MultiIndex.from_tuples(
[("Foo", "John"), ("Bar", "Michael")],
Expand Down