Skip to content

Commit 5c1a4c7

Browse files
refactor: re-arrange describe() logic into two helper methods (#1005)
* refactor: re-arrange describe logic into two helper methods * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 4cb62fd commit 5c1a4c7

File tree

1 file changed

+35
-39
lines changed

1 file changed

+35
-39
lines changed

bigframes/dataframe.py

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2302,52 +2302,19 @@ def melt(
23022302
self._block.melt(id_col_ids, val_col_ids, var_name, value_name)
23032303
)
23042304

2305-
_NUMERIC_DESCRIBE_AGGS = (
2306-
"count",
2307-
"mean",
2308-
"std",
2309-
"min",
2310-
"25%",
2311-
"50%",
2312-
"75%",
2313-
"max",
2314-
)
2315-
_NON_NUMERIC_DESCRIBE_AGGS = ("count", "nunique")
2316-
23172305
def describe(self, include: None | Literal["all"] = None) -> DataFrame:
2318-
2319-
allowed_non_numeric_types = {
2320-
bigframes.dtypes.STRING_DTYPE,
2321-
bigframes.dtypes.BOOL_DTYPE,
2322-
bigframes.dtypes.BYTES_DTYPE,
2323-
}
2324-
23252306
if include is None:
23262307
numeric_df = self._drop_non_numeric(permissive=False)
23272308
if len(numeric_df.columns) == 0:
23282309
# Describe eligible non-numeric columns
2329-
result = self.select_dtypes(include=allowed_non_numeric_types).agg(
2330-
self._NON_NUMERIC_DESCRIBE_AGGS
2331-
)
2332-
else:
2333-
# Otherwise, only describe numeric columns
2334-
result = numeric_df.agg(self._NUMERIC_DESCRIBE_AGGS)
2335-
return typing.cast(DataFrame, result)
2310+
return self._describe_non_numeric()
23362311

2337-
elif include == "all":
2338-
numeric_result = typing.cast(
2339-
DataFrame,
2340-
self._drop_non_numeric(permissive=False).agg(
2341-
self._NUMERIC_DESCRIBE_AGGS
2342-
),
2343-
)
2312+
# Otherwise, only describe numeric columns
2313+
return self._describe_numeric()
23442314

2345-
non_numeric_result = typing.cast(
2346-
DataFrame,
2347-
self.select_dtypes(include=allowed_non_numeric_types).agg(
2348-
self._NON_NUMERIC_DESCRIBE_AGGS
2349-
),
2350-
)
2315+
elif include == "all":
2316+
numeric_result = self._describe_numeric()
2317+
non_numeric_result = self._describe_non_numeric()
23512318

23522319
if len(numeric_result.columns) == 0:
23532320
return non_numeric_result
@@ -2364,6 +2331,35 @@ def describe(self, include: None | Literal["all"] = None) -> DataFrame:
23642331
else:
23652332
raise ValueError(f"Unsupported include type: {include}")
23662333

2334+
def _describe_numeric(self) -> DataFrame:
2335+
return typing.cast(
2336+
DataFrame,
2337+
self._drop_non_numeric(permissive=False).agg(
2338+
[
2339+
"count",
2340+
"mean",
2341+
"std",
2342+
"min",
2343+
"25%",
2344+
"50%",
2345+
"75%",
2346+
"max",
2347+
]
2348+
),
2349+
)
2350+
2351+
def _describe_non_numeric(self) -> DataFrame:
2352+
return typing.cast(
2353+
DataFrame,
2354+
self.select_dtypes(
2355+
include={
2356+
bigframes.dtypes.STRING_DTYPE,
2357+
bigframes.dtypes.BOOL_DTYPE,
2358+
bigframes.dtypes.BYTES_DTYPE,
2359+
}
2360+
).agg(["count", "nunique"]),
2361+
)
2362+
23672363
def skew(self, *, numeric_only: bool = False):
23682364
if not numeric_only:
23692365
frame = self._raise_on_non_numeric("skew")

0 commit comments

Comments
 (0)