@@ -2302,52 +2302,19 @@ def melt(
2302
2302
self ._block .melt (id_col_ids , val_col_ids , var_name , value_name )
2303
2303
)
2304
2304
2305
- _NUMERIC_DESCRIBE_AGGS = (
2306
- "count" ,
2307
- "mean" ,
2308
- "std" ,
2309
- "min" ,
2310
- "25%" ,
2311
- "50%" ,
2312
- "75%" ,
2313
- "max" ,
2314
- )
2315
- _NON_NUMERIC_DESCRIBE_AGGS = ("count" , "nunique" )
2316
-
2317
2305
def describe (self , include : None | Literal ["all" ] = None ) -> DataFrame :
2318
-
2319
- allowed_non_numeric_types = {
2320
- bigframes .dtypes .STRING_DTYPE ,
2321
- bigframes .dtypes .BOOL_DTYPE ,
2322
- bigframes .dtypes .BYTES_DTYPE ,
2323
- }
2324
-
2325
2306
if include is None :
2326
2307
numeric_df = self ._drop_non_numeric (permissive = False )
2327
2308
if len (numeric_df .columns ) == 0 :
2328
2309
# Describe eligible non-numeric columns
2329
- result = self .select_dtypes (include = allowed_non_numeric_types ).agg (
2330
- self ._NON_NUMERIC_DESCRIBE_AGGS
2331
- )
2332
- else :
2333
- # Otherwise, only describe numeric columns
2334
- result = numeric_df .agg (self ._NUMERIC_DESCRIBE_AGGS )
2335
- return typing .cast (DataFrame , result )
2310
+ return self ._describe_non_numeric ()
2336
2311
2337
- elif include == "all" :
2338
- numeric_result = typing .cast (
2339
- DataFrame ,
2340
- self ._drop_non_numeric (permissive = False ).agg (
2341
- self ._NUMERIC_DESCRIBE_AGGS
2342
- ),
2343
- )
2312
+ # Otherwise, only describe numeric columns
2313
+ return self ._describe_numeric ()
2344
2314
2345
- non_numeric_result = typing .cast (
2346
- DataFrame ,
2347
- self .select_dtypes (include = allowed_non_numeric_types ).agg (
2348
- self ._NON_NUMERIC_DESCRIBE_AGGS
2349
- ),
2350
- )
2315
+ elif include == "all" :
2316
+ numeric_result = self ._describe_numeric ()
2317
+ non_numeric_result = self ._describe_non_numeric ()
2351
2318
2352
2319
if len (numeric_result .columns ) == 0 :
2353
2320
return non_numeric_result
@@ -2364,6 +2331,35 @@ def describe(self, include: None | Literal["all"] = None) -> DataFrame:
2364
2331
else :
2365
2332
raise ValueError (f"Unsupported include type: { include } " )
2366
2333
2334
+ def _describe_numeric (self ) -> DataFrame :
2335
+ return typing .cast (
2336
+ DataFrame ,
2337
+ self ._drop_non_numeric (permissive = False ).agg (
2338
+ [
2339
+ "count" ,
2340
+ "mean" ,
2341
+ "std" ,
2342
+ "min" ,
2343
+ "25%" ,
2344
+ "50%" ,
2345
+ "75%" ,
2346
+ "max" ,
2347
+ ]
2348
+ ),
2349
+ )
2350
+
2351
+ def _describe_non_numeric (self ) -> DataFrame :
2352
+ return typing .cast (
2353
+ DataFrame ,
2354
+ self .select_dtypes (
2355
+ include = {
2356
+ bigframes .dtypes .STRING_DTYPE ,
2357
+ bigframes .dtypes .BOOL_DTYPE ,
2358
+ bigframes .dtypes .BYTES_DTYPE ,
2359
+ }
2360
+ ).agg (["count" , "nunique" ]),
2361
+ )
2362
+
2367
2363
def skew (self , * , numeric_only : bool = False ):
2368
2364
if not numeric_only :
2369
2365
frame = self ._raise_on_non_numeric ("skew" )
0 commit comments