Skip to content

Commit a053008

Browse files
kescoboalyst
authored andcommitted
Add BrayCurtis (#76)
add BrayCurtis dissimilarity (add tests, benchmarks, update README)
1 parent 4d36d59 commit a053008

File tree

6 files changed

+44
-1
lines changed

6 files changed

+44
-1
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ This package also provides optimized functions to compute column-wise and pairwi
3636
* Mean squared deviation
3737
* Root mean squared deviation
3838
* Normalized root mean squared deviation
39+
* Bray-Curtis dissimilarity
3940

4041
For ``Euclidean distance``, ``Squared Euclidean distance``, ``Cityblock distance``, ``Minkowski distance``, and ``Hamming distance``, a weighted version is also provided.
4142

@@ -139,6 +140,7 @@ Each distance corresponds to a distance type. The type name and the correspondin
139140
| Hamming | `hamming(k, l)` | `sum(k .!= l)` |
140141
| RogersTanimoto | `rogerstanimoto(a, b)` | `2(sum(a&!b) + sum(!a&b)) / (2(sum(a&!b) + sum(!a&b)) + sum(a&b) + sum(!a&!b))` |
141142
| Jaccard | `jaccard(x, y)` | `1 - sum(min(x, y)) / sum(max(x, y))` |
143+
| BrayCurtis | `braycurtis(x, y)` | `sum(abs(x - y)) / sum(abs(x + y))` |
142144
| CosineDist | `cosine_dist(x, y)` | `1 - dot(x, y) / (norm(x) * norm(y))` |
143145
| CorrDist | `corr_dist(x, y)` | `cosine_dist(x - mean(x), y - mean(y))` |
144146
| ChiSqDist | `chisq_dist(x, y)` | `sum((x - y).^2 / (x + y))` |

benchmark/benchmarks.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ function create_distances(w, Q)
2020
HellingerDist(),
2121

2222
Haversine(6371.),
23+
24+
BrayCurtis(),
2325

2426
WeightedSqEuclidean(w),
2527
WeightedEuclidean(w),

benchmark/print_table.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ order = [
3232
:SqMahalanobis,
3333
:Mahalanobis,
3434
:Haversine,
35+
:BrayCurtis,
3536
]
3637

3738
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 2.0 # Long enough

src/Distances.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export
2323
Chebyshev,
2424
Minkowski,
2525
Jaccard,
26+
BrayCurtis,
2627
RogersTanimoto,
2728

2829
Hamming,
@@ -57,6 +58,7 @@ export
5758
sqeuclidean,
5859
cityblock,
5960
jaccard,
61+
braycurtis,
6062
rogerstanimoto,
6163
chebyshev,
6264
minkowski,

src/metrics.jl

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ struct Hamming <: Metric end
2525

2626
struct CosineDist <: SemiMetric end
2727
struct CorrDist <: SemiMetric end
28+
struct BrayCurtis <: SemiMetric end
2829

2930
struct ChiSqDist <: SemiMetric end
3031
struct KLDivergence <: PreMetric end
@@ -98,7 +99,7 @@ struct RMSDeviation <: Metric end
9899
struct NormRMSDeviation <: Metric end
99100

100101

101-
const UnionMetrics = Union{Euclidean,SqEuclidean,Chebyshev,Cityblock,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,CorrDist,ChiSqDist,KLDivergence,RenyiDivergence,JSDivergence,SpanNormDist,GenKLDivergence}
102+
const UnionMetrics = Union{Euclidean,SqEuclidean,Chebyshev,Cityblock,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,CorrDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence}
102103

103104
"""
104105
Euclidean([thresh])
@@ -356,6 +357,27 @@ end
356357
end
357358
jaccard(a::AbstractArray, b::AbstractArray) = evaluate(Jaccard(), a, b)
358359

360+
# BrayCurtis
361+
362+
@inline eval_start(::BrayCurtis, a::AbstractArray{Bool}, b::AbstractArray{Bool}) = 0, 0
363+
@inline eval_start(::BrayCurtis, a::AbstractArray{T}, b::AbstractArray{T}) where {T} = zero(T), zero(T)
364+
@inline function eval_op(::BrayCurtis, s1, s2)
365+
abs_m = abs(s1 - s2)
366+
abs_p = abs(s1 + s2)
367+
abs_m, abs_p
368+
end
369+
@inline function eval_reduce(::BrayCurtis, s1, s2)
370+
@inbounds a = s1[1] + s2[1]
371+
@inbounds b = s1[2] + s2[2]
372+
a, b
373+
end
374+
@inline function eval_end(::BrayCurtis, a)
375+
@inbounds v = a[1] / a[2]
376+
return v
377+
end
378+
braycurtis(a::AbstractArray, b::AbstractArray) = evaluate(BrayCurtis(), a, b)
379+
380+
359381
# Tanimoto
360382

361383
@inline eval_start(::RogersTanimoto, a::AbstractArray, b::AbstractArray) = 0, 0, 0, 0

test/test_dists.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ end
125125
bt = [true, false, true]
126126
bf = [false, true, true]
127127
@test rogerstanimoto(bt, bf) == 4.0 / 5.0
128+
@test braycurtis(bt, bf) == 0.5
129+
128130
for T in (Float64, F64)
129131

130132
for (_x, _y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]),
@@ -135,6 +137,7 @@ end
135137
@test jaccard(x, y) == 13.0 / 28
136138
@test cityblock(x, y) == 13.0
137139
@test chebyshev(x, y) == 6.0
140+
@test braycurtis(x, y) == 1. - (30./43.)
138141
@test minkowski(x, y, 2) == sqrt(57.0)
139142
@test_throws DimensionMismatch cosine_dist(1.0:2, 1.0:3)
140143
@test cosine_dist(x, y) (1.0 - 112. / sqrt(19530.0))
@@ -177,6 +180,11 @@ end
177180
@inferred evaluate(Jaccard(), [1, 2, 3], [1, 2, 3])
178181
@inferred evaluate(Jaccard(), [true, false, true], [false, true, true])
179182

183+
# Test Bray-Curtis. Should be 1 if no elements are shared, 0 if all are the same
184+
@test braycurtis([1,0,3],[0,1,0]) == 1.0
185+
@test braycurtis(rand(10), zeros(10)) == 1.0
186+
@test braycurtis([1,0],[1,0]) == 0.0
187+
180188
# Test KL, Renyi and JS divergences
181189
r = rand(T, 12)
182190
p = copy(r)
@@ -232,12 +240,16 @@ end #testset
232240
@test isa(cityblock(a, b), T)
233241
@test chebyshev(a, b) == 0.0
234242
@test isa(chebyshev(a, b), T)
243+
@test braycurtis(a, b) == 0.0
244+
@test isa(braycurtis(a, b), T)
235245
@test minkowski(a, b, 2) == 0.0
236246
@test isa(minkowski(a, b, 2), T)
237247
@test hamming(a, b) == 0.0
238248
@test isa(hamming(a, b), Int)
239249
@test renyi_divergence(a, b, 1.0) == 0.0
240250
@test isa(renyi_divergence(a, b, 2.0), T)
251+
@test braycurtis(a, b) == 0.0
252+
@test isa(braycurtis(a, b), T)
241253

242254
w = T[]
243255
@test isa(whamming(a, b, w), T)
@@ -384,6 +396,7 @@ end
384396

385397
test_colwise(BhattacharyyaDist(), X, Y, T)
386398
test_colwise(HellingerDist(), X, Y, T)
399+
test_colwise(BrayCurtis(), X, Y, T)
387400

388401
w = rand(T, m)
389402

@@ -456,6 +469,7 @@ end
456469

457470
test_pairwise(BhattacharyyaDist(), X, Y, T)
458471
test_pairwise(HellingerDist(), X, Y, T)
472+
test_pairwise(BrayCurtis(), X, Y, T)
459473

460474
w = rand(m)
461475

0 commit comments

Comments
 (0)