Merge pull request #495 from chentoast/dirichlet

ztangent · web-flow · commit 4af5d8df7862 · 2023-01-20T13:17:14.000-05:00
feat: add dirichlet distribution
diff --git a/src/modeling_library/distributions/dirichlet.jl b/src/modeling_library/distributions/dirichlet.jl
@@ -0,0 +1,42 @@
+struct Dirichlet <: Distribution{Float64} end
+
+"""
+    Dirichlet(alpha::Vector{Float64})
+
+Sample a simplex Vector{Float64} from a Dirichlet distribution.
+"""
+const dirichlet = Dirichlet()
+
+function logpdf(::Dirichlet, x::AbstractVector{T}, alpha::AbstractVector{U}) where {T <: Real, U <: Real}
+    if length(x) == length(alpha) && isapprox(sum(x), 1) && all(x .>= 0) && all(alpha .>= 0)
+        ll = sum((a_i - 1) * log(x_i) for (a_i, x_i) in zip(alpha, x))
+        ll -= sum(loggamma.(alpha)) - loggamma(sum(alpha))
+        ll
+    else
+        -Inf
+    end
+end
+
+function logpdf_grad(::Dirichlet, x::AbstractVector{T}, alpha::AbstractVector{U}) where {T <: Real, U <: Real}
+    if length(x) == length(alpha) && isapprox(sum(x), 1) && all(x .>= 0) && all(alpha .>= 0)
+        deriv_x = (alpha .- 1) ./ x
+        deriv_alpha = log.(x) .- digamma.(alpha) .+ digamma(sum(alpha))
+        (deriv_x, deriv_alpha)
+    else
+        (zero(x), zero(alpha))
+    end
+end
+
+function random(::Dirichlet, alpha::AbstractVector{T}) where {T <: Real}
+    rand(Distributions.Dirichlet(alpha))
+end
+
+is_discrete(::Dirichlet) = false
+
+(::Dirichlet)(alpha) = random(Dirichlet(), alpha)
+
+has_output_grad(::Dirichlet) = true
+has_argument_grads(::Dirichlet) = (true,)
+
+export dirichlet
+
diff --git a/src/modeling_library/distributions/distributions.jl b/src/modeling_library/distributions/distributions.jl
@@ -4,6 +4,7 @@ include("beta.jl")
 include("binom.jl")
 include("categorical.jl")
 include("cauchy.jl")
+include("dirichlet.jl")
 include("exponential.jl")
 include("gamma.jl")
 include("geometric.jl")
diff --git a/test/modeling_library/distributions.jl b/test/modeling_library/distributions.jl
@@ -1,4 +1,5 @@
 import DataStructures: OrderedDict
+import LinearAlgebra: diagm
 
 @testset "bernoulli" begin
 
@@ -233,6 +234,61 @@ end
     @test isapprox(actual[3][2, 2], finite_diff_mat_sym(f, args, 3, 2, 2, dx))
 end
 
+@testset "dirichlet" begin
+    x = dirichlet([1., 1., 1., 1.])
+    @test length(x) == 4
+    @test isapprox(sum(x), 1.)
+
+    # bounds checking
+    @test logpdf(dirichlet, [0., 0], [1., 1.]) == -Inf
+    @test logpdf(dirichlet, [1., 1.], [1., 1.]) == -Inf
+    @test logpdf(dirichlet, [2., -1], [1., 1.]) == -Inf
+    @test logpdf(dirichlet, [.5, .5], [-1., 1.]) == -Inf
+    @test logpdf(dirichlet, [.5, .5], [-1., 1.]) == -Inf
+    @test logpdf(dirichlet, [0., 1], [1., 1.]) != -Inf
+
+    @test isapprox(logpdf(dirichlet, [.01, .99], [2., 2.]),
+                   Distributions.logpdf(Distributions.Dirichlet([2., 2.]), [.01, .99]))
+    @test isapprox(logpdf(dirichlet, [.01, .99], [1., 4.]),
+                   Distributions.logpdf(Distributions.Dirichlet([1., 4.]), [.01, .99]))
+    @test isapprox(logpdf(dirichlet, [.01, .99], [.01, .01]),
+                   Distributions.logpdf(Distributions.Dirichlet([.01, .01]), [.01, .99]))
+
+    # for d > 2
+    @test isapprox(logpdf(dirichlet, [.2, .2, .6], [2., 2., 4.]),
+                   Distributions.logpdf(Distributions.Dirichlet([2., 2., 4.]), [.2, .2, .6]))
+
+    function softmax(x)
+      exp.(x) / sum(exp.(x))
+    end
+
+    function softmax_grad(x)
+      diagm(x) .- (x .* x')
+    end
+
+    f = (x, alpha) -> logpdf(dirichlet, x, alpha)
+    f_normalized = (x, alpha) -> logpdf(dirichlet, softmax(x), alpha)
+
+    args = ([0., 0., 0., 0.], [1., 2., 3., 3.])
+    normalized_args = ([.25, .25, .25, .25], [1., 2., 3., 3.])
+
+    actual = logpdf_grad(dirichlet, normalized_args...)
+
+    # gradients with respect to x
+    actual_x_grad = actual[1]' * softmax_grad(normalized_args[1])
+
+    @test isapprox(actual_x_grad[1], finite_diff_vec(f_normalized, args, 1, 1, dx))
+    @test isapprox(actual_x_grad[2], finite_diff_vec(f_normalized, args, 1, 2, dx))
+    @test isapprox(actual_x_grad[3], finite_diff_vec(f_normalized, args, 1, 3, dx))
+    @test isapprox(actual_x_grad[4], finite_diff_vec(f_normalized, args, 1, 4, dx))
+
+    # gradients with respect to alpha
+    @test isapprox(actual[2][1], finite_diff_vec(f, normalized_args, 2, 1, dx))
+    @test isapprox(actual[2][2], finite_diff_vec(f, normalized_args, 2, 2, dx))
+    @test isapprox(actual[2][3], finite_diff_vec(f, normalized_args, 2, 3, dx))
+    @test isapprox(actual[2][4], finite_diff_vec(f, normalized_args, 2, 4, dx))
+end
+
 @testset "uniform" begin
 
     # random
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,7 @@
 using Gen
 using Test
 import Random
+import Distributions
 
 """
 Compute a numerical partial derivative of `f` with respect to the `i`th