Skip to content

Commit 17c8294

Browse files
authored
feat: add "auto" bin method (#14)
* add auto bin method that avoids catastrophic blow-up from fd * change psi bin default to auto instead of fd
1 parent 0daad2f commit 17c8294

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

python/rapidstats/bin.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,12 @@ def sqrt(x: ArrayLike) -> int:
191191
Bin count
192192
"""
193193
return math.ceil(math.sqrt(len(x)))
194+
195+
196+
def auto(x: ArrayLike) -> int:
197+
fd = freedman_diaconis(x)
198+
199+
if fd >= sqrt(x):
200+
return doane(x)
201+
else:
202+
return fd

python/rapidstats/drift.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@
66
import polars as pl
77
from polars.series.series import ArrayLike
88

9-
from .bin import doane, freedman_diaconis, rice, scott, sqrt, sturges
9+
from .bin import auto, doane, freedman_diaconis, rice, scott, sqrt, sturges
1010

11-
BinMethod = Literal["doane", "fd", "rice", "sturges", "scott", "sqrt"]
11+
BinMethod = Literal["auto", "doane", "fd", "rice", "sturges", "scott", "sqrt"]
1212

1313

1414
def _bin_count(x: pl.Series, bin_count: int | BinMethod) -> int:
1515
if isinstance(bin_count, int):
1616
return bin_count
17+
elif bin_count == "auto":
18+
return auto(x)
1719
elif bin_count == "doane":
1820
return doane(x)
1921
elif bin_count == "fd":
@@ -178,7 +180,7 @@ def psi(
178180
current: ArrayLike,
179181
*,
180182
bins: list[float] | None = None,
181-
bin_count: int | BinMethod = "fd",
183+
bin_count: int | BinMethod = "auto",
182184
include_nulls: bool = True,
183185
epsilon: float | None = 1e-4,
184186
) -> float:
@@ -208,6 +210,7 @@ def psi(
208210
If an integer, the number of bins. It can also be a string corresponding to an
209211
auto-binning method, by default "fd". The possible methods are
210212
213+
- "auto", see [rapidstats.bin.auto][]
211214
- "doane", see [rapidstats.bin.doane][]
212215
- "fd", see [rapidstats.bin.freedman_diaconis][]
213216
- "rice", see [rapidstats.bin.rice][]

0 commit comments

Comments
 (0)