Add option to return cost with regularization term (#75)

devmotion · web-flow · commit 15e296a044e5 · 2021-05-26T09:16:33.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -1,18 +1,20 @@
 name = "OptimalTransport"
 uuid = "7e02d93a-ae51-4f58-b602-d97af76e3b33"
 authors = ["zsteve <stephenz@student.unimelb.edu.au>"]
-version = "0.3.1"
+version = "0.3.2"
 
 [deps]
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
 Distances = "0.9.0, 0.10"
 IterativeSolvers = "0.8.4, 0.9"
+LogExpFunctions = "0.2"
 MathOptInterface = "0.9"
 julia = "1"
 
diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl
@@ -7,6 +7,7 @@ module OptimalTransport
 using Distances
 using LinearAlgebra
 using IterativeSolvers, SparseArrays
+using LogExpFunctions: LogExpFunctions
 using MathOptInterface
 
 export sinkhorn, sinkhorn2
@@ -171,19 +172,23 @@ function sinkhorn_gibbs(mu, nu, K; tol=1e-9, check_marginal_step=10, maxiter=100
 end
 
 """
-    sinkhorn(mu, nu, C, eps; tol=1e-9, check_marginal_step=10, maxiter=1000)
+    sinkhorn(μ, ν, C, ε; tol=1e-9, check_marginal_step=10, maxiter=1_000)
 
-Compute entropically regularised transport plan of histograms `mu` and `nu` with cost matrix `C` and entropic
-regularization parameter `eps`. 
-
-Return optimal transport coupling `γ` of the same dimensions as `C` which solves 
+Compute the optimal transport plan for the entropic regularization optimal transport problem
+with source and target marginals `μ` and `ν`, cost matrix `C` of size
+`(length(μ), length(ν))`, and entropic regularization parameter `ε`.
 
+The optimal transport plan `γ` is of the same size as `C` and solves
 ```math
-\\inf_{\\gamma \\in \\Pi(\\mu, \\nu)} \\langle \\gamma, C \\rangle - \\epsilon H(\\gamma)
+\\inf_{\\gamma \\in \\Pi(\\mu, \\nu)} \\langle \\gamma, C \\rangle
++ \\varepsilon \\Omega(\\gamma),
 ```
+where ``\\Omega(\\gamma) = \\sum_{i,j} \\gamma_{i,j} \\log \\gamma_{i,j}`` is the entropic
+regularization term.
 
-where ``H`` is the entropic regulariser, ``H(\\gamma) = -\\sum_{i, j} \\gamma_{ij} \\log(\\gamma_{ij})``.
-
+Every `check_marginal_step` steps a convergence check of the error of the marginal
+`μ` with absolute tolerance `tol` is performed. After `maxiter` iterations, the
+computation is stopped.
 """
 function sinkhorn(mu, nu, C, eps; kwargs...)
     # compute Gibbs kernel
@@ -196,24 +201,22 @@ function sinkhorn(mu, nu, C, eps; kwargs...)
 end
 
 """
-    sinkhorn2(mu, nu, C, eps; plan=nothing, kwargs...)
-
-Compute entropically regularised transport cost of histograms `mu` and `nu` with cost matrix `C` and entropic
-regularization parameter `eps`.
-
-Return optimal value of
-
-```math
-\\inf_{\\gamma \\in \\Pi(\\mu, \\nu)} \\langle \\gamma, C \\rangle - \\epsilon H(\\gamma)
-```
+    sinkhorn2(μ, ν, C, ε; regularization=false, plan=nothing, kwargs...)
 
-where ``H`` is the entropic regulariser, ``H(\\gamma) = -\\sum_{i, j} \\gamma_{ij} \\log(\\gamma_{ij})``.
+Solve the entropic regularization optimal transport problem with source and target
+marginals `μ` and `ν`, cost matrix `C` of size `(length(μ), length(ν))`, and entropic
+regularization parameter `ε`, and return the optimal cost.
 
 A pre-computed optimal transport `plan` may be provided.
 
+!!! note
+    As the `sinkhorn2` function in the Python Optimal Transport package, this function
+    returns the optimal transport cost without the regularization term. The cost
+    with the regularization term can be computed by setting `regularization=true`.
+
 See also: [`sinkhorn`](@ref)
 """
-function sinkhorn2(μ, ν, C, ε; plan=nothing, kwargs...)
+function sinkhorn2(μ, ν, C, ε; regularization=false, plan=nothing, kwargs...)
     γ = if plan === nothing
         sinkhorn(μ, ν, C, ε; kwargs...)
     else
@@ -225,7 +228,14 @@ function sinkhorn2(μ, ν, C, ε; plan=nothing, kwargs...)
         )
         plan
     end
-    return dot(γ, C)
+
+    cost = if regularization
+        dot(γ, C) + ε * sum(LogExpFunctions.xlogx, γ)
+    else
+        dot(γ, C)
+    end
+
+    return cost
 end
 
 """
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -82,14 +82,24 @@ end
         γ_pot = POT.sinkhorn(μ, ν, C, eps; numItermax=5_000, stopThr=1e-9)
         @test norm(γ - γ_pot, Inf) < 1e-9
 
-        # compute optimal transport cost (Julia implementation + POT)
+        # compute optimal transport cost
         c = sinkhorn2(μ, ν, C, eps; maxiter=5_000)
+
+        # with regularization term
+        c_w_regularization = sinkhorn2(μ, ν, C, eps; maxiter=5_000, regularization=true)
+        @test c_w_regularization ≈ c + eps * sum(x -> iszero(x) ? x : x * log(x), γ)
+
+        # compare with POT
         c_pot = POT.sinkhorn2(μ, ν, C, eps; numItermax=5_000, stopThr=1e-9)[1]
-        @test c ≈ c_pot atol = 1e-9
+        @test c_pot ≈ c atol = 1e-9
 
-        # ensure that provided map is used
+        # ensure that provided map is used and correct
         c2 = sinkhorn2(similar(μ), similar(ν), C, rand(); plan=γ)
         @test c2 ≈ c
+        c2_w_regularization = sinkhorn2(
+            similar(μ), similar(ν), C, eps; plan=γ, regularization=true
+        )
+        @test c2_w_regularization ≈ c_w_regularization
     end
 
     # different element type
@@ -109,12 +119,17 @@ end
         γ_pot = POT.sinkhorn(μ, ν, C, eps; numItermax=5_000, stopThr=1e-9)
         @test norm(γ - γ_pot, Inf) < Base.eps(Float32)
 
-        # compute optimal transport cost (Julia implementation + POT)
+        # compute optimal transport cost
         c = sinkhorn2(μ, ν, C, eps; maxiter=5_000)
         @test c isa Float32
 
+        # with regularization term
+        c_w_regularization = sinkhorn2(μ, ν, C, eps; maxiter=5_000, regularization=true)
+        @test c_w_regularization ≈ c + eps * sum(x -> iszero(x) ? x : x * log(x), γ)
+
+        # compare with POT
         c_pot = POT.sinkhorn2(μ, ν, C, eps; numItermax=5_000, stopThr=1e-9)[1]
-        @test c ≈ c_pot atol = Base.eps(Float32)
+        @test c_pot ≈ c atol = Base.eps(Float32)
     end
 end