Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions src/probabilistic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,113 @@ $DOC_DISTRIBUTIONS
SphericalScore
"$SphericalScoreDoc"
const spherical_score = SphericalScore()


# ---------------------------------------------------------------------
# Continuous Boyce Index
struct _ContinuousBoyceIndex
n_bins::Integer
bin_overlap::AbstractFloat
min::Union{AbstractFloat, Nothing}
max::Union{AbstractFloat, Nothing}
cor::Function
function _ContinuousBoyceIndex(; n_bins = 101, bin_overlap = 0.1, min = nothing, max = nothing, cor = StatsBase.corspearman)
new(n_bins, bin_overlap, min, max, cor)
end
end

ContinuousBoyceIndex(; kw...) = _ContinuousBoyceIndex(; kw...) |> robust_measure |> fussy_measure

function (m::_ContinuousBoyceIndex)(ŷ::UnivariateFiniteArray, y::NonMissingCatArrOrSub; warn=true)
warn && @warn ConfusionMatrices.WARN_UNORDERED(levels(y))
positive_class = classes(first(ŷ))|> last
scores = pdf.(ŷ, positive_class)
ma = isnothing(m.max) ? maximum(scores) : m.max
mi = isnothing(m.min) ? minimum(scores) : m.min
binwidth = m.bin_overlap * (ma - mi)

return _cbi(scores, y, positive_class, m.n_bins, binwidth, ma, mi, m.cor)
end

function _cbi(scores, y, positive_class, nbins, binwidth, ma, mi, cor)
binstarts = range(mi, stop=ma-binwidth, length=nbins)
binends = range(mi + binwidth, stop=ma, length=nbins)

sorted_indices = sortperm(scores)
sorted_scores = view(scores, sorted_indices)
sorted_y = view(y, sorted_indices)

tot_positive = count(==(positive_class), y)
tot_negative = length(y) - tot_positive

n_positive = zeros(Int, nbins)
n_negative = zeros(Int, nbins)

@inbounds for i in 1:nbins
bin_index_first = searchsortedfirst(sorted_scores, binstarts[i])
bin_index_last = searchsortedlast(sorted_scores, binends[i])
@inbounds for j in bin_index_first:bin_index_last
if sorted_y[j] == positive_class
n_positive[i] += 1
end
end
n_negative[i] = bin_index_last - bin_index_first + 1 - n_positive[i]
end

n_total = n_positive .+ n_negative

# omit bins with no negative - we don't want to divide by zero
no_obs = n_negative .== 0
deleteat!(n_positive, no_obs)
deleteat!(n_negative, no_obs)
binstarts = binstarts[.!no_obs]

binmeans = (n_positive ./ tot_positive) ./ (n_negative ./ tot_negative)
r = cor(binmeans, binstarts)
isnan(r) && error()
return r
end

const ContinuousBoyceIndexType = API.FussyMeasure{<:API.RobustMeasure{<:_ContinuousBoyceIndex}}

@fix_show ContinuousBoyceIndex::ContinuousBoyceIndexType

StatisticalMeasures.@trait(
_ContinuousBoyceIndex,
consumes_multiple_observations=true,
observation_scitype = Finite{2},
kind_of_proxy=StatisticalMeasures.LearnAPI.Distribution(),
orientation=Score(),
external_aggregation_mode=Mean(),
human_name = "continuous boyce index",
)

register(ContinuousBoyceIndex, "continuous_boyce_index", "cbi")

const ContinuousBoyceIndexDoc = docstring(
"ContinuousBoyceIndex(; n_bins=101, bin_overlap=0.1, min=nothing, max=nothing, cor=StatsBase.corspearman)",
body=
"""
The Continuous Boyce Index is a measure for evaluating the performance of probabilistic predictions for binary classification,
especially for presence-background data in ecological modeling.
It compares the predicted probability scores for the positive class across bins, giving higher scores if the ratio of positive
and negative samples in each bin is strongly correlated to the value at that bin.

- `n_bins`: Number of bins to use for score partitioning.
- `bin_overlap`: Fractional overlap between bins.
- `min`, `max`: Optional minimum and maximum score values for binning.
- `cor`: Correlation function (default: Spearman correlation).

The predictions `ŷ` should be a vector of `UnivariateFinite` distributions from CategoricalDistributions.jl, and `y` a vector of ground truth labels.

Returns the correlation between the ratio of positive to negative samples in each bin and the bin centers.

See also [Boyce Index (Wikipedia)](https://en.wikipedia.org/wiki/Boyce_index).
""",
scitype="",
)

"$ContinuousBoyceIndexDoc"
ContinuousBoyceIndex
"$ContinuousBoyceIndexDoc"
const cbi = ContinuousBoyceIndex()
Loading