Skip to content

Commit baa1dd9

Browse files
more cleanups now sum is implemented
1 parent 0871326 commit baa1dd9

File tree

8 files changed

+13
-20
lines changed

8 files changed

+13
-20
lines changed

asv_bench/benchmarks/groupby.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,9 +752,6 @@ class String:
752752
]
753753

754754
def setup(self, dtype, method):
755-
if dtype == "string[python]" and method == "sum":
756-
raise NotImplementedError # skip benchmark
757-
758755
cols = list("abcdefghjkl")
759756
self.df = DataFrame(
760757
np.random.randint(0, 100, size=(10_000, len(cols))),

pandas/tests/generic/test_frame.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def test_metadata_propagation_indiv_groupby(self):
6161
"D": np.random.default_rng(2).standard_normal(8),
6262
}
6363
)
64-
df = df.astype({"A": object, "B": object})
6564
result = df.groupby("A").sum()
6665
tm.assert_metadata_equivalent(df, result)
6766

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ def test_groupby_as_index_agg(df):
10201020

10211021
result2 = grouped.agg({"C": "mean", "D": "sum"})
10221022
expected2 = grouped.mean(numeric_only=True)
1023-
expected2["D"] = grouped.sum(numeric_only=True)["D"]
1023+
expected2["D"] = grouped.sum()["D"]
10241024
tm.assert_frame_equal(result2, expected2)
10251025

10261026
grouped = df.groupby("A", as_index=True)

pandas/tests/groupby/methods/test_quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def test_groupby_quantile_nullable_array(values, q):
244244

245245
@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
246246
@pytest.mark.parametrize("numeric_only", [True, False])
247-
def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only, using_infer_string):
247+
def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
248248
df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
249249
if numeric_only:
250250
result = df.groupby("a").quantile(q, numeric_only=numeric_only)

pandas/tests/groupby/test_groupby.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,6 @@ def test_ops_not_as_index(reduction_func):
575575

576576

577577
def test_as_index_series_return_frame(df):
578-
df = df.astype({"A": object, "B": object})
579578
grouped = df.groupby("A", as_index=False)
580579
grouped2 = df.groupby(["A", "B"], as_index=False)
581580

@@ -979,7 +978,6 @@ def test_groupby_with_hier_columns():
979978

980979

981980
def test_grouping_ndarray(df):
982-
df = df.astype({"A": object, "B": object})
983981
grouped = df.groupby(df["A"].values)
984982
grouped2 = df.groupby(df["A"].rename(None))
985983

@@ -1477,13 +1475,10 @@ def f(group):
14771475

14781476
def test_no_dummy_key_names(df):
14791477
# see gh-1291
1480-
df = df.astype({"A": object, "B": object})
1481-
gb = df.groupby(df["A"].values)
1482-
gb2 = df.groupby([df["A"].values, df["B"].values])
1483-
result = gb.sum()
1478+
result = df.groupby(df["A"].values).sum()
14841479
assert result.index.name is None
14851480

1486-
result2 = gb2.sum()
1481+
result2 = df.groupby([df["A"].values, df["B"].values]).sum()
14871482
assert result2.index.names == (None, None)
14881483

14891484

pandas/tests/groupby/test_numeric_only.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def df(self):
2828
"group": [1, 1, 2],
2929
"int": [1, 2, 3],
3030
"float": [4.0, 5.0, 6.0],
31-
"string": Series(["a", "b", "c"], dtype=object),
31+
"string": Series(["a", "b", "c"], dtype="str"),
32+
"object": Series(["a", "b", "c"], dtype=object),
3233
"category_string": Series(list("abc")).astype("category"),
3334
"category_int": [7, 8, 9],
3435
"datetime": date_range("20130101", periods=3),
@@ -40,6 +41,7 @@ def df(self):
4041
"int",
4142
"float",
4243
"string",
44+
"object",
4345
"category_string",
4446
"category_int",
4547
"datetime",
@@ -112,6 +114,7 @@ def test_first_last(self, df, method):
112114
"int",
113115
"float",
114116
"string",
117+
"object",
115118
"category_string",
116119
"category_int",
117120
"datetime",
@@ -159,7 +162,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
159162

160163
# object dtypes for transformations are not implemented in Cython and
161164
# have no Python fallback
162-
exception = NotImplementedError if method.startswith("cum") else TypeError
165+
exception = (
166+
(NotImplementedError, TypeError) if method.startswith("cum") else TypeError
167+
)
163168

164169
if method in ("min", "max", "cummin", "cummax", "cumsum", "cumprod"):
165170
# The methods default to numeric_only=False and raise TypeError
@@ -170,6 +175,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
170175
re.escape(f"agg function failed [how->{method},dtype->object]"),
171176
# cumsum/cummin/cummax/cumprod
172177
"function is not implemented for this dtype",
178+
f"dtype 'str' does not support operation '{method}'",
173179
]
174180
)
175181
with pytest.raises(exception, match=msg):

pandas/tests/groupby/test_raises.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,7 @@ def test_groupby_raises_string(
194194
"quantile",
195195
]:
196196
msg = f"dtype 'str' does not support operation '{groupby_func}'"
197-
if groupby_func == "sum":
198-
# The object-dtype allows this, StringArray variants do not.
199-
klass = TypeError
200-
elif groupby_func in ["sem", "std", "skew"]:
197+
if groupby_func in ["sem", "std", "skew"]:
201198
# The object-dtype raises ValueError when trying to convert to numeric.
202199
klass = TypeError
203200
elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":

pandas/tests/reshape/test_pivot.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ def test_pivot_table_nocols(self):
139139
df = DataFrame(
140140
{"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
141141
)
142-
df = df.astype({"rows": object, "cols": object})
143142
rs = df.pivot_table(columns="cols", aggfunc="sum")
144143
xp = df.pivot_table(index="cols", aggfunc="sum").T
145144
tm.assert_frame_equal(rs, xp)

0 commit comments

Comments
 (0)