Skip to content

Commit 07b9de1

Browse files
authored
Merge pull request #277 from uriahf/fix-qcut-instability-5996398262518663597
Fix: Replace unstable `qcut` with robust rank-based decile calculation
2 parents 725e7d7 + c17fe6d commit 07b9de1

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

src/rtichoke/calibration/calibration.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -534,18 +534,21 @@ def _make_deciles_dat_binary(
534534

535535
df = pl.concat(frames, how="vertical")
536536

537-
labels = [str(i) for i in range(1, n_bins + 1)]
538-
539537
df = df.with_columns(
540538
[
541539
pl.col("prob").cast(pl.Float64),
542540
pl.col("real").cast(pl.Float64),
543-
pl.col("prob")
544-
.qcut(n_bins, labels=labels, allow_duplicates=True)
545-
.over(["reference_group", "model"])
546-
.alias("decile"),
541+
(
542+
(
543+
pl.col("prob").rank("ordinal").over(["reference_group", "model"])
544+
- 1
545+
)
546+
* n_bins
547+
// pl.count().over(["reference_group", "model"])
548+
+ 1
549+
).alias("decile"),
547550
]
548-
).with_columns(pl.col("decile").cast(pl.Int32))
551+
)
549552

550553
deciles_data = (
551554
df.group_by(["reference_group", "model", "decile"])

0 commit comments

Comments
 (0)