-
Notifications
You must be signed in to change notification settings - Fork 6
add continuous boyce index #52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9b55804
f7560b7
f2beeed
381a707
479eb8c
fe58b7b
1aff4f1
987bf06
f735f5d
30bccf9
2c9851b
9a6b932
b47634d
5723e5e
46fd4bc
7c4a9b9
36ae7b6
3785741
c17f0b7
58fdd66
d3122db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -536,3 +536,118 @@ $DOC_DISTRIBUTIONS | |||||
| SphericalScore | ||||||
| "$SphericalScoreDoc" | ||||||
| const spherical_score = SphericalScore() | ||||||
|
|
||||||
|
|
||||||
| # --------------------------------------------------------------------- | ||||||
| # Continuous Boyce Index | ||||||
| struct _ContinuousBoyceIndex | ||||||
| n_bins::Integer | ||||||
| bin_overlap::AbstractFloat | ||||||
| min::Union{AbstractFloat, Nothing} | ||||||
| max::Union{AbstractFloat, Nothing} | ||||||
| cor::Function | ||||||
| function _ContinuousBoyceIndex(; n_bins = 101, bin_overlap = 0.1, min = nothing, max = nothing, cor = StatsBase.corspearman) | ||||||
| new(n_bins, bin_overlap, min, max, cor) | ||||||
| end | ||||||
| end | ||||||
|
|
||||||
| ContinuousBoyceIndex(; kw...) = _ContinuousBoyceIndex(; kw...) |> robust_measure |> fussy_measure | ||||||
|
|
||||||
| function (m::_ContinuousBoyceIndex)(ŷ::UnivariateFiniteArray, y::NonMissingCatArrOrSub; warn=true) | ||||||
| warn && warn_unordered(levels(y)) | ||||||
| positive_class = classes(first(ŷ))|> last | ||||||
| scores = pdf.(ŷ, positive_class) | ||||||
| ma = isnothing(m.max) ? maximum(scores) : m.max | ||||||
| mi = isnothing(m.min) ? minimum(scores) : m.min | ||||||
| binwidth = m.bin_overlap * (ma - mi) | ||||||
|
|
||||||
| return _cbi(scores, y, positive_class, m.n_bins, binwidth, ma, mi, m.cor) | ||||||
| end | ||||||
|
|
||||||
| function _cbi(scores, y, positive_class, nbins, binwidth, ma, mi, cor) | ||||||
| binstarts = range(mi, stop=ma-binwidth, length=nbins) | ||||||
| binends = range(mi + binwidth, stop=ma, length=nbins) | ||||||
|
|
||||||
| sorted_indices = sortperm(scores) | ||||||
| sorted_scores = view(scores, sorted_indices) | ||||||
| sorted_y = view(y, sorted_indices) | ||||||
|
|
||||||
| tot_positive = count(==(positive_class), y) | ||||||
| tot_negative = length(y) - tot_positive | ||||||
|
|
||||||
| n_positive = zeros(Int, nbins) | ||||||
| n_negative = zeros(Int, nbins) | ||||||
|
|
||||||
| @inbounds for i in 1:nbins | ||||||
| bin_index_first = searchsortedfirst(sorted_scores, binstarts[i]) | ||||||
| bin_index_last = searchsortedlast(sorted_scores, binends[i]) | ||||||
| @inbounds for j in bin_index_first:bin_index_last | ||||||
| if sorted_y[j] == positive_class | ||||||
| n_positive[i] += 1 | ||||||
| end | ||||||
| end | ||||||
| n_negative[i] = bin_index_last - bin_index_first + 1 - n_positive[i] | ||||||
| end | ||||||
|
|
||||||
| n_total = n_positive .+ n_negative | ||||||
|
|
||||||
| # omit bins with no negative - we don't want to divide by zero | ||||||
| no_obs = n_negative .== 0 | ||||||
| deleteat!(n_positive, no_obs) | ||||||
| deleteat!(n_negative, no_obs) | ||||||
| binstarts = binstarts[.!no_obs] | ||||||
|
|
||||||
| # calculate the relative frequencies of the positive class in each bin | ||||||
| binmeans = (n_positive ./ tot_positive) ./ (n_negative ./ tot_negative) | ||||||
| r = cor(binmeans, binstarts) | ||||||
| isnan(r) && error( | ||||||
| "Could not calculate a correlation coefficient because no bins with at least owned | ||||||
| negative and one positive observation. Try decreasing the number of bins or increasing | ||||||
| the bin overlap." | ||||||
| ) | ||||||
| return r | ||||||
| end | ||||||
|
|
||||||
| const ContinuousBoyceIndexType = API.FussyMeasure{<:API.RobustMeasure{<:_ContinuousBoyceIndex}} | ||||||
|
|
||||||
| @fix_show ContinuousBoyceIndex::ContinuousBoyceIndexType | ||||||
|
|
||||||
| StatisticalMeasures.@trait( | ||||||
| _ContinuousBoyceIndex, | ||||||
| consumes_multiple_observations=true, | ||||||
| observation_scitype = Finite{2}, | ||||||
| kind_of_proxy=StatisticalMeasures.LearnAPI.Distribution(), | ||||||
| orientation=Score(), | ||||||
| external_aggregation_mode=Mean(), | ||||||
| human_name = "continuous boyce index", | ||||||
|
||||||
| human_name = "continuous boyce index", | |
| human_name = "continuous Boyce index", |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ablaom what would be the right way to go here? I know the other functions don't have this interface, but here I think it would make a lot of sense to allow cbi(ŷ, y; n_bins=5)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, you have to make n_bins part of the struct. So you do ContinuousBoyceIndex(nbins=5)(yhat, y).
However, if you want, you can define a pure functional version Functions.continuous_boyce_index here and refactor so that your struct version calls that. And then documentation can point out the core implementation, like we do for MatthewsCorrelation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The type annotation for
yhatis too strict. Either remove it altogether, or you could do::AbstractArray{<:UnivariateFinite}. For consider