Merge pull request #714 from JuliaStats/dw/entropy

mschauer · web-flow · commit c4432ab1090f · 2021-10-10T09:58:29.000+02:00
Fix type instability of `entropy` and generalize `crossentropy` and `kldivergence`
diff --git a/Project.toml b/Project.toml
@@ -1,12 +1,13 @@
 name = "StatsBase"
 uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 authors = ["JuliaStats"]
-version = "0.33.10"
+version = "0.33.11"
 
 [deps]
 DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -18,6 +19,7 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 [compat]
 DataAPI = "1"
 DataStructures = "0.10, 0.11, 0.12, 0.13, 0.14, 0.17, 0.18"
+LogExpFunctions = "0.3"
 Missings = "0.3, 0.4, 1.0"
 SortingAlgorithms = "0.3, 1.0"
 StatsAPI = "1"
diff --git a/src/StatsBase.jl b/src/StatsBase.jl
@@ -8,6 +8,7 @@ import DataAPI: describe
 import DataStructures: heapify!, heappop!, percolate_down!
 using SortingAlgorithms
 using Missings
+using LogExpFunctions: xlogx, xlogy
 
 using Statistics
 using LinearAlgebra
diff --git a/src/scalarstats.jl b/src/scalarstats.jl
@@ -532,7 +532,13 @@ Compute the entropy of a collection of probabilities `p`,
 optionally specifying a real number `b` such that the entropy is scaled by `1/log(b)`.
 Elements with probability 0 or 1 add 0 to the entropy.
 """
-entropy(p) = -sum(pᵢ -> iszero(pᵢ) ? zero(pᵢ) : pᵢ * log(pᵢ), p)
+function entropy(p)
+    if isempty(p)
+        throw(ArgumentError("empty collections are not supported since they do not " *
+                            "represent proper probability distributions"))
+    end
+    return -sum(xlogx, p)
+end
 
 entropy(p, b::Real) = entropy(p) / log(b)
 
@@ -584,21 +590,26 @@ end
 Compute the cross entropy between `p` and `q`, optionally specifying a real
 number `b` such that the result is scaled by `1/log(b)`.
 """
-function crossentropy(p::AbstractArray{T}, q::AbstractArray{T}) where T<:Real
+function crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real})
     length(p) == length(q) || throw(DimensionMismatch("Inconsistent array length."))
-    s = 0.
-    z = zero(T)
-    for i = 1:length(p)
-        @inbounds pi = p[i]
-        @inbounds qi = q[i]
-        if pi > z
-            s += pi * log(qi)
-        end
+
+    # handle empty collections
+    if isempty(p)
+        Base.depwarn(
+            "support for empty collections will be removed since they do not " *
+            "represent proper probability distributions",
+            :crossentropy,
+        )
+        # return zero for empty arrays
+        return xlogy(zero(eltype(p)), zero(eltype(q)))
     end
-    return -s
+
+    # use pairwise summation (https://github.com/JuliaLang/julia/pull/31020)
+    broadcasted = Broadcast.broadcasted(xlogy, vec(p), vec(q))
+    return - sum(Broadcast.instantiate(broadcasted))
 end
 
-crossentropy(p::AbstractArray{T}, q::AbstractArray{T}, b::Real) where {T<:Real} =
+crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) =
     crossentropy(p,q) / log(b)
 
 
@@ -610,21 +621,32 @@ also called the relative entropy of `p` with respect to `q`,
 that is the sum `pᵢ * log(pᵢ / qᵢ)`. Optionally a real number `b`
 can be specified such that the divergence is scaled by `1/log(b)`.
 """
-function kldivergence(p::AbstractArray{T}, q::AbstractArray{T}) where T<:Real
+function kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real})
     length(p) == length(q) || throw(DimensionMismatch("Inconsistent array length."))
-    s = 0.
-    z = zero(T)
-    for i = 1:length(p)
-        @inbounds pi = p[i]
-        @inbounds qi = q[i]
-        if pi > z
-            s += pi * log(pi / qi)
-        end
+
+    # handle empty collections
+    if isempty(p)
+        Base.depwarn(
+            "support for empty collections will be removed since they do not "*
+            "represent proper probability distributions",
+            :kldivergence,
+        )
+        # return zero for empty arrays
+        pzero = zero(eltype(p))
+        qzero = zero(eltype(q))
+        return xlogy(pzero, zero(pzero / qzero))
     end
-    return s
+
+    # use pairwise summation (https://github.com/JuliaLang/julia/pull/31020)
+    broadcasted = Broadcast.broadcasted(vec(p), vec(q)) do pi, qi
+        # handle pi = qi = 0, otherwise `NaN` is returned
+        piqi = iszero(pi) && iszero(qi) ? zero(pi / qi) : pi / qi
+        return xlogy(pi, piqi)
+    end
+    return sum(Broadcast.instantiate(broadcasted))
 end
 
-kldivergence(p::AbstractArray{T}, q::AbstractArray{T}, b::Real) where {T<:Real} =
+kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) =
     kldivergence(p,q) / log(b)
 
 #############################
diff --git a/test/REQUIRE b/test/REQUIRE
diff --git a/test/scalarstats.jl b/test/scalarstats.jl
@@ -154,12 +154,19 @@ it = (xᵢ for xᵢ in x)
 
 ##### entropy
 
-@test entropy([0.5, 0.5])      ≈ 0.6931471805599453
-@test entropy([0.2, 0.3, 0.5]) ≈ 1.0296530140645737
+@test @inferred(entropy([0.5, 0.5]))      ≈ 0.6931471805599453
+@test @inferred(entropy([1//2, 1//2]))    ≈ 0.6931471805599453
+@test @inferred(entropy([0.5f0, 0.5f0])) isa Float32
+@test @inferred(entropy([0.2, 0.3, 0.5])) ≈ 1.0296530140645737
+@test iszero(@inferred(entropy([0, 1])))
+@test iszero(@inferred(entropy([0.0, 1.0])))
 
-@test entropy([0.5, 0.5],2)       ≈ 1.0
-@test entropy([0.2, 0.3, 0.5], 2) ≈ 1.4854752972273344
-@test entropy([1.0, 0.0]) ≈ 0.0
+@test @inferred(entropy([0.5, 0.5], 2))      ≈ 1.0
+@test @inferred(entropy([1//2, 1//2], 2))    ≈ 1.0
+@test @inferred(entropy([0.2, 0.3, 0.5], 2)) ≈ 1.4854752972273344
+
+@test_throws ArgumentError @inferred(entropy(Float64[]))
+@test_throws ArgumentError @inferred(entropy(Int[]))
 
 ##### Renyi entropies
 # Generate a random probability distribution
@@ -200,12 +207,31 @@ scale = rand()
 @test renyientropy(udist * scale, order) ≈ renyientropy(udist, order) - log(scale)
 
 ##### Cross entropy
-@test crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])    ≈ 1.1176681825904018
-@test crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2) ≈ 1.6124543443825532
+@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3]))     ≈ 1.1176681825904018
+@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018
+@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0])) isa Float32
+@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2))     ≈ 1.6124543443825532
+@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532
+@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2f0)) isa Float32
+
+# deprecated, should throw an `ArgumentError` at some point
+logpattern = (:warn, "support for empty collections will be removed since they do not represent proper probability distributions")
+@test iszero(@test_logs logpattern @inferred(crossentropy(Float64[], Float64[])))
+@test iszero(@test_logs logpattern @inferred(crossentropy(Int[], Int[])))
 
 ##### KL divergence
-@test kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])    ≈ 0.08801516852582819
-@test kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2) ≈ 0.12697904715521868
+@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3]))     ≈ 0.08801516852582819
+@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819
+@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0])) isa Float32
+@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2))     ≈ 0.12697904715521868
+@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868
+@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2f0)) isa Float32
+@test iszero(@inferred(kldivergence([0, 1], [0f0, 1f0])))
+
+# deprecated, should throw an `ArgumentError` at some point
+logpattern = (:warn, "support for empty collections will be removed since they do not represent proper probability distributions")
+@test iszero(@test_logs logpattern @inferred(kldivergence(Float64[], Float64[])))
+@test iszero(@test_logs logpattern @inferred(kldivergence(Int[], Int[])))
 
 ##### summarystats