Add in Rényi divergences (#49)

richardreeve · KristofferC · commit 3877ea49196f · 2016-07-26T11:49:08.000+02:00
* Add in Rényi divergences.

* Add in Renyi divergences to README.md.

* test_dists.jl: p and q are prob distributions, so sum to 1.

* test_dists.jl: Move tests that don't use x and y out of the x,y for loop.

* Fix Rényi entropies to handle generalised probability distributions.

* Test Rényi divergences.

* Lining up |s

* Rewrote eval_op() for RenyiDivergence in function form.

* Corrected test for zero, and return result where computed.

* Removing unicode symbols from arguments.

* No need to define Renyi divergence for numbers rather than vectors.

* Add renyi_divergence() colwise and pairwise tests.

* Add renyi_divergence() tests for empty vectors and NaNs.

* Rearrange tests to create variables near where they are used.

* Document p for RenyiDivergence

* Replace 0 and 1 with correct type.

* Correct type for eval_reduce(::RenyiDivergence) and eval_op(::RenyiDivergence), and turn eval_reduce and eval_end into functions, handling q = Inf correctly and testing the code.
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ This package also provides optimized functions to compute column-wise and pairwi
 * Correlation distance
 * Chi-square distance
 * Kullback-Leibler divergence
+* Rényi divergence
 * Jensen-Shannon divergence
 * Mahalanobis distance
 * Squared Mahalanobis distance
@@ -138,6 +139,7 @@ Each distance corresponds to a distance type. The type name and the correspondin
 |  CorrDist            |  corr_dist(x, y)         | cosine_dist(x - mean(x), y - mean(y)) |
 |  ChiSqDist           |  chisq_dist(x, y)        | sum((x - y).^2 / (x + y)) |
 |  KLDivergence        |  kl_divergence(x, y)     | sum(p .* log(p ./ q)) |
+|  RenyiDivergence     | renyi_divergence(x, y, k)| log(sum( x .* (x ./ y) .^ (k - 1))) / (k - 1) |
 |  JSDivergence        |  js_divergence(x, y)     | KL(x, m) / 2 + KL(y, m) / 2 with m = (x + y) / 2 |
 |  SpanNormDist        |  spannorm_dist(x, y)     | max(x - y) - min(x - y ) |
 |  BhattacharyyaDist   |  bhattacharyya(x, y)     | -log(sum(sqrt(x .* y) / sqrt(sum(x) * sum(y))) |
diff --git a/src/Distances.jl b/src/Distances.jl
@@ -33,6 +33,7 @@ export
     ChiSqDist,
     KLDivergence,
     JSDivergence,
+    RenyiDivergence,
     SpanNormDist,
 
     WeightedEuclidean,
@@ -61,6 +62,7 @@ export
     chisq_dist,
     kl_divergence,
     js_divergence,
+    renyi_divergence,
     spannorm_dist,
 
     weuclidean,
diff --git a/src/metrics.jl b/src/metrics.jl
@@ -24,12 +24,34 @@ type CorrDist <: SemiMetric end
 
 type ChiSqDist <: SemiMetric end
 type KLDivergence <: PreMetric end
+
+immutable RenyiDivergence{T <: Real} <: PreMetric
+    p::T # order of power mean (order of divergence - 1)
+    is_normal::Bool
+    is_zero::Bool
+    is_one::Bool
+    is_inf::Bool
+    function RenyiDivergence(q)
+        # There are four different cases:
+        #   simpler to separate them out now, not over and over in eval_op()
+        is_zero = q ≈ zero(T)
+        is_one = q ≈ one(T)
+        is_inf = isinf(q)
+        
+        # Only positive Rényi divergences are defined
+        !is_zero && q < zero(T) && throw(ArgumentError("Order of Rényi divergence not legal, $(q) < 0."))
+        
+        new(q - 1, !(is_zero || is_one || is_inf), is_zero, is_one, is_inf)
+    end
+end
+RenyiDivergence{T}(q::T) = RenyiDivergence{T}(q)
+
 type JSDivergence <: SemiMetric end
 
 type SpanNormDist <: SemiMetric end
 
 
-typealias UnionMetrics Union{Euclidean, SqEuclidean, Chebyshev, Cityblock, Minkowski, Hamming, Jaccard, RogersTanimoto, CosineDist, CorrDist, ChiSqDist, KLDivergence, JSDivergence, SpanNormDist}
+typealias UnionMetrics Union{Euclidean, SqEuclidean, Chebyshev, Cityblock, Minkowski, Hamming, Jaccard, RogersTanimoto, CosineDist, CorrDist, ChiSqDist, KLDivergence, RenyiDivergence, JSDivergence, SpanNormDist}
 
 ###########################################################
 #
@@ -141,6 +163,53 @@ chisq_dist(a::AbstractArray, b::AbstractArray) = evaluate(ChiSqDist(), a, b)
 @inline eval_reduce(::KLDivergence, s1, s2) = s1 + s2
 kl_divergence(a::AbstractArray, b::AbstractArray) = evaluate(KLDivergence(), a, b)
 
+# RenyiDivergence
+function eval_start{T<:AbstractFloat}(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T})
+    zero(T), zero(T)
+end
+
+@inline function eval_op{T<:AbstractFloat}(dist::RenyiDivergence, ai::T, bi::T)
+    if ai == zero(T)
+        return zero(T), zero(T)
+    elseif dist.is_normal
+        return ai, ai .* ((ai ./ bi) .^ dist.p)
+    elseif dist.is_zero
+        return ai, bi
+    elseif dist.is_one
+        return ai, ai * log(ai / bi)
+    else # otherwise q = ∞
+        return ai, ai / bi
+    end
+end
+
+@inline function eval_reduce{T<:AbstractFloat}(dist::RenyiDivergence,
+                                               s1::Tuple{T, T},
+                                               s2::Tuple{T, T})
+    if dist.is_inf
+        if s1[1] == zero(T)
+            return s2
+        elseif s2[1] == zero(T)
+            return s1
+        else
+            return s1[2] > s2[2] ? s1 : s2
+        end
+    else
+        return s1[1] + s2[1], s1[2] + s2[2]
+    end
+end
+
+function eval_end(dist::RenyiDivergence, s)
+    if dist.is_zero || dist.is_normal
+        log(s[2] / s[1]) / dist.p
+    elseif dist.is_one
+        return s[2] / s[1]
+    else # q = ∞
+        log(s[2])
+    end
+end
+
+renyi_divergence(a::AbstractArray, b::AbstractArray, q::Real) = evaluate(RenyiDivergence(q), a, b)
+
 # JSDivergence
 @inline function eval_op{T}(::JSDivergence, ai::T, bi::T)
     u = (ai + bi) / 2
diff --git a/test/test_dists.jl b/test/test_dists.jl
@@ -29,12 +29,6 @@ bf = [false, true, true]
 @test rogerstanimoto(bt, bt) == 0
 @test rogerstanimoto(bt, bf) == 4./5
 
-
-p = rand(12)
-p[p .< 0.3] = 0.
-q = rand(12)
-a = [1., 2., 1., 3., 2., 1.]
-b = [1., 3., 0., 2., 2., 0.]
 for (x, y) in (([4., 5., 6., 7.], [3., 9., 8., 1.]),
                 ([4., 5., 6., 7.], [3. 8.; 9. 1.]))
     @test sqeuclidean(x, x) == 0.
@@ -66,19 +60,6 @@ for (x, y) in (([4., 5., 6., 7.], [3., 9., 8., 1.]),
     @test chisq_dist(x, x) == 0.
     @test chisq_dist(x, y) == sum((x - vec(y)).^2 ./ (x + vec(y)))
 
-    klv = 0.
-    for i = 1 : length(p)
-        if p[i] > 0
-            klv += p[i] * log(p[i] / q[i])
-        end
-    end
-    @test kl_divergence(p, q) ≈ klv
-
-    pm = (p + q) / 2
-    jsv = kl_divergence(p, pm) / 2 + kl_divergence(q, pm) / 2
-    @test js_divergence(p, p) ≈ 0.0
-    @test js_divergence(p, q) ≈ jsv
-
     @test spannorm_dist(x, x) == 0.
     @test spannorm_dist(x, y) == maximum(x - vec(y)) - minimum(x - vec(y))
 
@@ -101,17 +82,57 @@ for (x, y) in (([4., 5., 6., 7.], [3., 9., 8., 1.]),
 
     @test wminkowski(x, x, w, 2) == 0.
     @test wminkowski(x, y, w, 2) ≈ weuclidean(x, y, w)
+end
 
-    w = rand(size(a))
+# Test weighted Hamming distances with even weights
+a = [1., 2., 1., 3., 2., 1.]
+b = [1., 3., 0., 2., 2., 0.]
+w = rand(size(a))
 
-    @test whamming(a, a, w) == 0.
-    @test whamming(a, b, w) == sum((a .!= b) .* w)
-end
+@test whamming(a, a, w) == 0.
+@test whamming(a, b, w) == sum((a .!= b) .* w)
 
+# Minimal test of Jaccard - test return type stability.
 @inferred evaluate(Jaccard(), rand(3), rand(3))
 @inferred evaluate(Jaccard(), [1,2,3], [1,2,3])
 @inferred evaluate(Jaccard(), [true, false, true], [false, true, true])
 
+# Test KL, Renyi and JS divergences
+p = r = rand(12)
+p[p .< 0.3] = 0.0
+scale = sum(p) / sum(r)
+r /= sum(r)    
+p /= sum(p)
+q = rand(12)
+q /= sum(q)
+
+klv = 0.
+for i = 1 : length(p)
+    if p[i] > 0
+        klv += p[i] * log(p[i] / q[i])
+    end
+end
+@test kl_divergence(p, q) ≈ klv
+
+@test renyi_divergence(p, p, 0) ≈ 0
+@test renyi_divergence(p, p, 1) ≈ 0
+@test renyi_divergence(p, p, rand()) ≈ 0
+@test renyi_divergence(p, p, 1.0 + rand()) ≈ 0
+@test renyi_divergence(p, p, Inf) ≈ 0
+@test renyi_divergence(p, r, 0) ≈ -log(scale)    
+@test renyi_divergence(p, r, 1) ≈ -log(scale)    
+@test renyi_divergence(p, r, rand()) ≈ -log(scale)    
+@test renyi_divergence(p, r, Inf) ≈ -log(scale)
+@test isinf(renyi_divergence([0.0, 0.5, 0.5], [0.0, 1.0, 0.0], Inf))
+@test renyi_divergence([0.0, 1.0, 0.0], [0.0, 0.5, 0.5], Inf) ≈ log(2.0)
+@test renyi_divergence(p, q, 1) ≈ kl_divergence(p, q)
+    
+pm = (p + q) / 2
+jsv = kl_divergence(p, pm) / 2 + kl_divergence(q, pm) / 2
+@test js_divergence(p, p) ≈ 0.0
+@test js_divergence(p, q) ≈ jsv
+
+
 end # testset
 
 
@@ -121,7 +142,8 @@ a = [NaN, 0]; b = [0, NaN]
 @test isnan(chebyshev(a, b)) == isnan(maximum(a-b))
 a = [NaN, 0]; b = [0, 1]
 @test isnan(chebyshev(a, b)) == isnan(maximum(a-b))
-
+@test !isnan(renyi_divergence([0.5, 0.0, 0.5], [0.5, NaN, 0.5], 2))
+@test isnan(renyi_divergence([0.5, 0.0, 0.5], [0.5, 0.5, NaN], 2))
 end #testset
 
 
@@ -141,6 +163,8 @@ b = Float64[]
 @test isa(minkowski(a, b, 2), Float64)
 @test hamming(a, b) == 0.0
 @test isa(hamming(a, b), Int)
+@test renyi_divergence(a, b, 1.0) == 0.0
+@test isa(renyi_divergence(a, b, 2.0), Float64)
 
 w = Float64[]
 @test isa(whamming(a, b, w), Float64)
@@ -261,6 +285,11 @@ P[P .< 0.3] = 0.
 
 @test_colwise ChiSqDist() X Y
 @test_colwise KLDivergence() P Q
+@test_colwise RenyiDivergence(0.0) P Q
+@test_colwise RenyiDivergence(1.0) P Q
+@test_colwise RenyiDivergence(Inf) P Q
+@test_colwise RenyiDivergence(0.5) P Q
+@test_colwise RenyiDivergence(2) P Q
 @test_colwise JSDivergence() P Q
 @test_colwise SpanNormDist() X Y
 
@@ -329,6 +358,11 @@ Q = rand(m, ny)
 
 @test_pairwise ChiSqDist() X Y
 @test_pairwise KLDivergence() P Q
+@test_pairwise RenyiDivergence(0.0) P Q
+@test_pairwise RenyiDivergence(1.0) P Q
+@test_pairwise RenyiDivergence(Inf) P Q
+@test_pairwise RenyiDivergence(0.5) P Q
+@test_pairwise RenyiDivergence(2) P Q
 @test_pairwise JSDivergence() P Q
 
 @test_pairwise BhattacharyyaDist() X Y