From 9b55804cbde86c1a8ed1e4214811f3cf4e4a33d3 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 16 Sep 2025 18:25:21 +0200 Subject: [PATCH] first draft --- src/probabilistic.jl | 110 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/src/probabilistic.jl b/src/probabilistic.jl index a3de1b8..59960d7 100644 --- a/src/probabilistic.jl +++ b/src/probabilistic.jl @@ -536,3 +536,113 @@ $DOC_DISTRIBUTIONS SphericalScore "$SphericalScoreDoc" const spherical_score = SphericalScore() + + +# --------------------------------------------------------------------- +# Continuous Boyce Index +struct _ContinuousBoyceIndex + n_bins::Integer + bin_overlap::AbstractFloat + min::Union{AbstractFloat, Nothing} + max::Union{AbstractFloat, Nothing} + cor::Function + function _ContinuousBoyceIndex(; n_bins = 101, bin_overlap = 0.1, min = nothing, max = nothing, cor = StatsBase.corspearman) + new(n_bins, bin_overlap, min, max, cor) + end +end + +ContinuousBoyceIndex(; kw...) = _ContinuousBoyceIndex(; kw...) |> robust_measure |> fussy_measure + +function (m::_ContinuousBoyceIndex)(ŷ::UnivariateFiniteArray, y::NonMissingCatArrOrSub; warn=true) + warn && @warn ConfusionMatrices.WARN_UNORDERED(levels(y)) + positive_class = classes(first(ŷ))|> last + scores = pdf.(ŷ, positive_class) + ma = isnothing(m.max) ? maximum(scores) : m.max + mi = isnothing(m.min) ? minimum(scores) : m.min + binwidth = m.bin_overlap * (ma - mi) + + return _cbi(scores, y, positive_class, m.n_bins, binwidth, ma, mi, m.cor) +end + +function _cbi(scores, y, positive_class, nbins, binwidth, ma, mi, cor) + binstarts = range(mi, stop=ma-binwidth, length=nbins) + binends = range(mi + binwidth, stop=ma, length=nbins) + + sorted_indices = sortperm(scores) + sorted_scores = view(scores, sorted_indices) + sorted_y = view(y, sorted_indices) + + tot_positive = count(==(positive_class), y) + tot_negative = length(y) - tot_positive + + n_positive = zeros(Int, nbins) + n_negative = zeros(Int, nbins) + + @inbounds for i in 1:nbins + bin_index_first = searchsortedfirst(sorted_scores, binstarts[i]) + bin_index_last = searchsortedlast(sorted_scores, binends[i]) + @inbounds for j in bin_index_first:bin_index_last + if sorted_y[j] == positive_class + n_positive[i] += 1 + end + end + n_negative[i] = bin_index_last - bin_index_first + 1 - n_positive[i] + end + + n_total = n_positive .+ n_negative + + # omit bins with no negative - we don't want to divide by zero + no_obs = n_negative .== 0 + deleteat!(n_positive, no_obs) + deleteat!(n_negative, no_obs) + binstarts = binstarts[.!no_obs] + + binmeans = (n_positive ./ tot_positive) ./ (n_negative ./ tot_negative) + r = cor(binmeans, binstarts) + isnan(r) && error() + return r +end + +const ContinuousBoyceIndexType = API.FussyMeasure{<:API.RobustMeasure{<:_ContinuousBoyceIndex}} + +@fix_show ContinuousBoyceIndex::ContinuousBoyceIndexType + +StatisticalMeasures.@trait( + _ContinuousBoyceIndex, + consumes_multiple_observations=true, + observation_scitype = Finite{2}, + kind_of_proxy=StatisticalMeasures.LearnAPI.Distribution(), + orientation=Score(), + external_aggregation_mode=Mean(), + human_name = "continuous boyce index", +) + +register(ContinuousBoyceIndex, "continuous_boyce_index", "cbi") + +const ContinuousBoyceIndexDoc = docstring( + "ContinuousBoyceIndex(; n_bins=101, bin_overlap=0.1, min=nothing, max=nothing, cor=StatsBase.corspearman)", + body= +""" +The Continuous Boyce Index is a measure for evaluating the performance of probabilistic predictions for binary classification, +especially for presence-background data in ecological modeling. +It compares the predicted probability scores for the positive class across bins, giving higher scores if the ratio of positive + and negative samples in each bin is strongly correlated to the value at that bin. + +- `n_bins`: Number of bins to use for score partitioning. +- `bin_overlap`: Fractional overlap between bins. +- `min`, `max`: Optional minimum and maximum score values for binning. +- `cor`: Correlation function (default: Spearman correlation). + +The predictions `ŷ` should be a vector of `UnivariateFinite` distributions from CategoricalDistributions.jl, and `y` a vector of ground truth labels. + +Returns the correlation between the ratio of positive to negative samples in each bin and the bin centers. + +See also [Boyce Index (Wikipedia)](https://en.wikipedia.org/wiki/Boyce_index). +""", + scitype="", +) + +"$ContinuousBoyceIndexDoc" +ContinuousBoyceIndex +"$ContinuousBoyceIndexDoc" +const cbi = ContinuousBoyceIndex()