Merge pull request #208 from uriahf/195-update-functions-for-discrimination-plots

uriahf · web-flow · commit 22b48f4b0d90 · 2025-11-28T12:40:56.000+02:00
fix: add fill_null with 0 values
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,9 +19,10 @@ dependencies = [
     "pandas>=2.2.3",
     "typing>=3.7.4.3",
     "polarstate==0.1.8",
+    "marimo>=0.17.0",
 ]
 name = "rtichoke"
-version = "0.1.16"
+version = "0.1.17"
 description = "interactive visualizations for performance of predictive models"
 readme = "README.md"
 
diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py
@@ -1204,7 +1204,8 @@ def _cast_and_join_adjusted_data_binary(
                 )
             ).alias("classification_outcome")
         )
-    )
+    ).with_columns(pl.col("reals_estimate").fill_null(0))
+
     return final_adjusted_data_polars
 
 
@@ -1562,6 +1563,17 @@ def _calculate_cumulative_aj_data_binary(aj_data: pl.DataFrame) -> pl.DataFrame:
         )
         .agg([pl.col("reals_estimate").sum()])
         .pivot(on="classification_outcome", values="reals_estimate")
+        .with_columns(
+            [
+                pl.col(col).fill_null(0)
+                for col in [
+                    "true_positives",
+                    "true_negatives",
+                    "false_positives",
+                    "false_negatives",
+                ]
+            ]
+        )
         .with_columns(
             (pl.col("true_positives") + pl.col("false_positives")).alias(
                 "predicted_positives"
@@ -1678,8 +1690,11 @@ def _turn_cumulative_aj_to_performance_data(
         (pl.col("true_negatives") / pl.col("real_negatives")).alias("specificity"),
         (pl.col("true_positives") / pl.col("predicted_positives")).alias("ppv"),
         (pl.col("true_negatives") / pl.col("predicted_negatives")).alias("npv"),
+        (pl.col("false_positives") / pl.col("real_negatives")).alias(
+            "false_positive_rate"
+        ),
         (
-            (pl.col("true_positives") / pl.col("real_positives"))
+            (pl.col("true_positives") / pl.col("predicted_positives"))
             / (pl.col("real_positives") / pl.col("n"))
         ).alias("lift"),
         pl.when(pl.col("stratified_by") == "probability_threshold")
@@ -1692,6 +1707,15 @@ def _turn_cumulative_aj_to_performance_data(
         .otherwise(None)
         .alias("net_benefit"),
         pl.when(pl.col("stratified_by") == "probability_threshold")
+        .then(
+            100 * (pl.col("true_negatives") / pl.col("n"))
+            - (pl.col("false_negatives") / pl.col("n"))
+            * (1 - pl.col("chosen_cutoff"))
+            / pl.col("chosen_cutoff")
+        )
+        .otherwise(None)
+        .alias("net_benefit_interventions_avoided"),
+        pl.when(pl.col("stratified_by") == "probability_threshold")
         .then(pl.col("predicted_positives") / pl.col("n"))
         .otherwise(pl.col("chosen_cutoff"))
         .alias("ppcr"),
diff --git a/uv.lock b/uv.lock