Work directly with the logpdf, improve numerical stability, add tests

mauriciogtec · mauriciogtec · commit e5b4a5e5384e · 2023-07-06T13:00:53.000-04:00
Fixes #10
diff --git a/.travis.yml b/.travis.yml
@@ -2,9 +2,8 @@ language: julia
 julia:
   - 0.7.0
   - 1.0.0
-  - 1.0.1
-  - 1.0.2
-  - 1.0.3
+  - 1.6.0
+  - 1.9.0
 before_install:
   - pip install --user codecov
 after_success:
diff --git a/Changelog.md b/Changelog.md
@@ -0,0 +1,11 @@
+# Changelog
+
+## v0.2.0
+
+- Added support for `min_slope`, `max_slope` in the initial point search for RejectionSampler.
+- Added keyword `logdensity` for directly specifying the logdensity in RejectionSampler [#10](https://github.com/mauriciogtec/AdaptiveRejectionSampling.jl/issues/10)
+- Improved numerical stability in the exp_integral. Added a warning for instability.
+- Added changelog.
+- Added complicated example to readme based on [#10](https://github.com/mauriciogtec/AdaptiveRejectionSampling.jl/issues/10)
+
+TODO: Add logger to avoid repeated warnings for numerical instability.
diff --git a/Project.toml b/Project.toml
@@ -1,16 +1,18 @@
 name = "AdaptiveRejectionSampling"
 uuid = "c75e803d-635f-53bd-ab7d-544e482d8c75"
-version = "0.1.2"
+version = "0.2.0"
 
 [deps]
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
 [compat]
-ForwardDiff = "0.10.1"
-StatsBase = "0.26,0.27,0.28,0.29,0.30,0.31,0.32,0.33,0.34"
-julia = "1"
+ForwardDiff = ">= 0.10.1"
+StatsBase = ">= 0.26"
+julia = ">= 1"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/README.md b/README.md
@@ -6,15 +6,18 @@
 
 # AdaptiveRejectionSampling
 
-This package is useful for efficientlysampling from log-concave univariate density functions.
+This package is useful for efficient sampling from log-concave univariate density functions.
 
 
+## Examples
+
 ```julia
 using AdaptiveRejectionSampling
 using Plots
 ```
 
-## Sampling from a shifted normal distribution
+
+### Sampling from a shifted normal distribution
 
 
 ```julia
@@ -48,7 +51,7 @@ plot!(x, [target envelop], width = 2, label = ["Normal(μ, σ)" "Envelop"])
 ![](img/example1.png)
 
 
-## Let's try a Gamma
+### Let's try a Gamma
 
 
 ```julia
@@ -75,7 +78,7 @@ plot!(x, [target envelop], width = 2, label = ["Gamma(α, β)" "Envelop"])
 
 ![](img/example2.png)
 
-## Truncated distributions and unknown normalisation constant
+### Truncated distributions and unknown normalization constant
 
 We don't to provide an exact density--it will sample up to proportionality--and we can do truncated distributions
 
@@ -103,7 +106,7 @@ plot!(x, [target envelop], width = 2, label = ["target density" "envelop"])
 
 ![](img/example3.png)
 
-## Elastic Net distribution
+### Elastic Net Eistribution
 
 The following example arises from elastic net regression and smoothing problems. In these cases, the integration constants are not available analytically.
 
@@ -132,7 +135,71 @@ plot!(x, [target envelop], width = 2, label = ["target density" "envelop"])
 
 ![](img/example4.png)
 
-To cite please use
+### Tips for numerical stability
+
+Here are some tips:
+
+- Make sure the logdensity is numerically stable in the domain and values above > 25;
+- Use log densities instead of densities using the keyword `logdensity=true`;
+- Specify a `min_slope` and `max_slope` to find better initial points. The default is 1e-6 and 1e6, respectively.
+  The `min_slope` is the minimum slope of the logdensity in the initial points  of the envelop in absolute value. In general,
+  it is a good idea to leave `min_slope` with the default and try `max_slope=10.0` or a smaller number.
+- Try setting `δ` to a smaller value in the search_grid. The default is 0.5. 
+
+
+⚠️ *Warning* ⚠️: Using `logdensity=true` will be the default in v1.0.
+
+Here is an example
+
+```julia
+import StatsFuns: logsumexp
+n = 50
+k = 10
+alpha = 0.5
+tau = 0.5
+theta = 1.0
+
+# a complicated logdensity
+logf(v) = n * v - (n - k * alpha) * logsumexp([v, log(tau)]) - theta / alpha * ( (tau + exp(v) )^alpha )
+
+# make two plots of logf and f
+p1 = plot(logf, -20, 20, label = "logf")
+p2 = plot(x -> exp(logf(x)), -20, 20, label = "f")
+plot(p1, p2, layout = (1, 2))
+
+# runs sampler
+δ = 0.1
+support = (-Inf, Inf)
+search = (0.0, 10.0)
+sampler = RejectionSampler(logf, support, δ, max_segments=10, logdensity=true, search_range=search, max_slope=10.0)
+@time sim = run_sampler!(sampler, 10000)
+```
+
+```
+[ Info: initial points found at 1.08, 5.43 with grads 9.94522619043481, -9.98968199019509
+      0.016296 seconds (371.21 k allocations: 6.850 MiB)
+```
+
+
+```julia
+x = range(0, 10, length=200)
+normconst = sum(f.(x)) * (x[2] - x[1])
+envelop = [eval_envelop(sampler.envelop, xi) for xi in x] ./ normconst
+target = [f(xi) for xi in x] ./ normconst
+
+# make two plots of logf and f
+p1 = plot(logf, -20, 20, label = "logf")
+p2 = histogram(sim, normalize=true, label="histogram")
+plot!(p2, x, [target envelop], width=2, label=["target density" "envelop"])
+
+plot(p1, p2, layout = (1, 2))
+```
+
+![](img/example5.png)
+
+## Citation
+
+
 
 ```bibtex
 @manual{tec2018ars,
@@ -142,4 +209,3 @@ To cite please use
   url = {https://github.com/mauriciogtec/AdaptiveRejectionSampling.jl}
 }
 ```
-
diff --git a/img/example5.png b/img/example5.png
diff --git a/src/AdaptiveRejectionSampling.jl b/src/AdaptiveRejectionSampling.jl
@@ -7,8 +7,8 @@ module AdaptiveRejectionSampling
 # ------------------------------
 using Random # Random stdlib
 # ------------------------------
-using ForwardDiff # For automatic differentiation, no user nor approximate derivatives
-using StatsBase # To include the basic sample from array function
+import ForwardDiff: derivative 
+import StatsBase: sample, weights
 # ------------------------------
 export Line, Objective, Envelop, RejectionSampler # Structures/classes
 export run_sampler!, sample_envelop, eval_envelop, add_segment! # Methods
@@ -29,7 +29,7 @@ Finds the horizontal coordinate of the intersection between lines
 """
 function intersection(l1::Line, l2::Line)
     @assert l1.slope != l2.slope "slopes should be different"
-    - (l2.intercept - l1.intercept) / (l2.slope - l1.slope)
+    -(l2.intercept - l1.intercept) / (l2.slope - l1.slope)
 end
 
 """
@@ -38,9 +38,19 @@ Computes the integral
     ``LaTeX \\int_{x_1} ^ {x_2} \\exp\\{ax + b\\} dx. ``
 The resulting value is the weight assigned to the segment [x1, x2] in the envelop
 """
-function exp_integral(l::Line, x1::Float64, x2::Float64)
+@inline function exp_integral(l::Line, x1::Float64, x2::Float64)
     a, b = l.slope, l.intercept
-    exp(b) * (exp(a * x2) - exp(a * x1)) / a
+    v1, v2 = a*x1, a*x2
+    if v1 > 25.0 || v2 > 25.0 || a == 0.0 || b > 25.0
+        @warn "exp_integral: numerical instability, truncating, check for under/overflow, consider truncating logf"
+        v1 = min(v1, 25.0)
+        v2 = min(v2, 25.0)
+        b = min(b, 25.0)
+        if a == 0
+            a = (v2 - v1) * 1e-6
+        end
+    end
+    exp(b) * (exp(v2) - exp(v1)) / a
 end
 
 """
@@ -56,13 +66,13 @@ mutable struct Envelop
     weights::AbstractVector{Float64}
     size::Int
 
-    Envelop(lines::Vector{Line}, support::Tuple{Float64, Float64}) = begin
-        @assert issorted([l.slope for l in lines], rev = true) "line slopes must be decreasing"
-        intersections = [intersection(lines[i], lines[i + 1]) for i in 1:(length(lines) - 1)]
+    Envelop(lines::Vector{Line}, support::Tuple{Float64,Float64}) = begin
+        @assert issorted([l.slope for l in lines], rev=true) "line slopes must be decreasing"
+        intersections = [intersection(lines[i], lines[i+1]) for i in 1:(length(lines)-1)]
         cutpoints = [support[1]; intersections; support[2]]
         @assert issorted(cutpoints) "cutpoints must be ordered"
         @assert length(unique(cutpoints)) == length(cutpoints) "cutpoints can't have duplicates"
-        weights = [exp_integral(l, cutpoints[i], cutpoints[i + 1]) for (i, l) in enumerate(lines)]
+        weights = [exp_integral(l, cutpoints[i], cutpoints[i+1]) for (i, l) in enumerate(lines)]
         @assert Inf ∉ weights "Overflow in assigning weights"
         new(lines, cutpoints, weights, length(lines))
     end
@@ -84,19 +94,19 @@ function add_segment!(e::Envelop, l::Line)
         # Insert in second position, first one is the support bound
         insert!(e.cutpoints, pos + 1, new_cut)
     elseif pos == e.size + 1
-        new_cut = intersection(l, e.lines[pos - 1])
+        new_cut = intersection(l, e.lines[pos-1])
         insert!(e.cutpoints, pos, new_cut)
     else
-        new_cut1 = intersection(l, e.lines[pos - 1])
+        new_cut1 = intersection(l, e.lines[pos-1])
         new_cut2 = intersection(l, e.lines[pos])
         splice!(e.cutpoints, pos, [new_cut1, new_cut2])
-        @assert issorted(e.cutpoints)  "incompatible line: resulting intersection points aren't sorted"
+        @assert issorted(e.cutpoints) "incompatible line: resulting intersection points aren't sorted"
     end
     # Insert the new line
     insert!(e.lines, pos, l)
     e.size += 1
     # Recompute weights (this could be done more efficiently in the future by updating the neccesary ones only)
-    e.weights = [exp_integral(line, e.cutpoints[i], e.cutpoints[i + 1]) for (i, line) in enumerate(e.lines)]
+    e.weights = [exp_integral(line, e.cutpoints[i], e.cutpoints[i+1]) for (i, line) in enumerate(e.lines)]
 end
 
 """
@@ -129,7 +139,7 @@ function eval_envelop(e::Envelop, x::Float64)
     if pos == 1 || pos == length(e.cutpoints) + 1
         return 0.0
     else
-        a, b = e.lines[pos - 1].slope, e.lines[pos - 1].intercept
+        a, b = e.lines[pos-1].slope, e.lines[pos-1].intercept
         return exp(a * x + b)
     end
 end
@@ -147,27 +157,37 @@ struct Objective
     grad::Function
     Objective(logf::Function) = begin
         # Automatic differentiation
-        grad(x) = ForwardDiff.derivative(logf, x)
+        grad(x) = derivative(logf, x)
         new(logf, grad)
     end
     Objective(logf::Function, grad::Function) = new(logf, grad)
 end
 
 """
+RejectionSampler(f::Function, support::Tuple{Float64, Float64}, init::Tuple{Float64, Float64})
     RejectionSampler(f::Function, support::Tuple{Float64, Float64}[ ,δ::Float64])
-    RejectionSampler(f::Function, support::Tuple{Float64, Float64}, init::Tuple{Float64, Float64})
-An adaptive rejection sampler to obtain iid samples from a logconcave function `f`, supported in the
-domain `support` = (support[1], support[2]). To create the object, two initial points `init = init[1], init[2]`
-such that `loff'(init[1]) > 0` and `logf'(init[2]) < 0` are necessary. If they are not provided, the constructor
-will perform a greedy search based on `δ`.
-
-The argument `support` must be of the form `(-Inf, Inf), (-Inf, a), (b, Inf), (a,b)`, and it represent the
+An adaptive rejection sampler to obtain iid samples from a logconcave function supported in 
+`support = (support[1], support[2])`. f can either be the either probability density of the 
+function to be sampled, or its logarithm. For the latter, use the keyword argument `log=true`. 
+The functions can be unnormalized in the sense that the probability density can be specified up to a constant. 
+The adaptive rejection samplings algorithm requires two initial points `init = init[1], init[2]`
+such that (d/dx)logp(init[1]) > 0 and (d/dx)logp(init[2]) < 0. These points can be provided directly
+(typically, any point left and right of the mode will do). It is also possibe to specify and search
+range and delta for a greedy search of the initial points. 
+The `support` must be of the form `(-Inf, Inf), (-Inf, a), (b, Inf), (a,b)`, and it represent the
 interval in which f has positive value, and zero elsewhere.
 
+The alternative constructor uses a search_range, a value δ for the distance between points in the search,
+and min/max slope values in absolute terms.
+
 ## Keyword arguments
 - `max_segments::Int = 10` : max size of envelop, the rejection-rate is usually slow with a small number of segments
 - `max_failed_factor::Float64 = 0.001`: level at which throw an error if one single sample has a rejection rate
     exceeding this value
+- `logdensity::Bool = false`: indicator fo whether `f` is the log of the probability density, up to a normalization constant.
+- `search_range::Tuple{Float64,Float64} = (-10.0, 10.0)`: range in which to search for initial points
+- `min_slope::Float64 = 1e-6`: minimum slope in absolute value of logf at the initial/found points
+- `max_slope::Float64 = Inf: maximum slope in absolute value of logf at the initial/found points
 """
 struct RejectionSampler
     objective::Objective
@@ -176,42 +196,56 @@ struct RejectionSampler
     max_failed_rate::Float64
     # Constructor when initial points are provided
     RejectionSampler(
-            f::Function,
-            support::Tuple{Float64, Float64},
-            init::Tuple{Float64, Float64};
-            max_segments::Int = 25,
-            max_failed_rate::Float64 = 0.001
+        f::Function,
+        support::Tuple{Float64,Float64},
+        init::Tuple{Float64,Float64};
+        max_segments::Int=25,
+        logdensity::Bool=false,
+        max_failed_rate::Float64=0.001,
     ) = begin
         @assert support[1] < support[2] "invalid support, not an interval"
-        logf(x) = log(f(x))
-        objective = Objective(logf)
+        if logdensity
+            objective = Objective(f)
+        else
+            objective = Objective(x -> log(f(x)))
+        end
         x1, x2 = init
         @assert x1 < x2 "cutpoints must be ordered"
         a1, a2 = objective.grad(x1), objective.grad(x2)
-        @assert a1 >= 0 "logf must have positive slope at initial cutpoint 1"
-        @assert a2 <= 0 "logf must have negative slope at initial cutpoint 2"
+        @assert 0.0 < a1 "logf must have positive slope at initial cutpoint 1"
+        @assert a2 < 0.0 "logf must have negative slope at initial cutpoint 2"
         b1, b2 = objective.logf(x1) - a1 * x1, objective.logf(x2) - a2 * x2
         line1, line2 = Line(a1, b1), Line(a2, b2)
         envelop = Envelop([line1, line2], support)
         new(objective, envelop, max_segments, max_failed_rate)
     end
-
-    # Constructor for greedy search of starting points
+""
     RejectionSampler(
-            f::Function,
-            support::Tuple{Float64, Float64},
-            δ::Float64 = 0.5;
-            search_range::Tuple{Float64, Float64} = (-10.0,10.0),
-            kwargs...
+        f::Function,
+        support::Tuple{Float64,Float64},
+        δ::Float64=0.5;
+        search_range::Tuple{Float64,Float64}=(-10.0, 10.0),
+        min_slope::Float64=1e-6,
+        max_slope::Float64=10.0,
+        logdensity::Bool=false,
+        kwargs...
     ) = begin
-        logf(x) = log(f(x))
-        grad(x) = ForwardDiff.derivative(logf, x)
+        if logdensity
+            logf = f
+        else
+            logf = (x -> log(f(x)))
+        end
+        grad(x) = derivative(logf, x)
         grid_lims = max(search_range[1], support[1]), min(search_range[2], support[2])
         grid = grid_lims[1]:δ:grid_lims[2]
-        i1, i2 = findfirst(grad.(grid) .> 0.), findfirst(grad.(grid) .< 0.)
-        @assert (i1 != nothing) &&  (i2 != nothing) "couldn't find initial points, please provide them or change `search_range`"
+        grads = grad.(grid)
+        i1 = findfirst(min_slope .< grads .< max_slope)
+        i2 = findlast(min_slope .< - grads .< max_slope)
+        @assert (i1 !== nothing) && (i2 !== nothing) "couldn't find initial points, please provide them or change `search_range`"
+        @assert i1 < i2 "function is not logconcave, first index with grad>0 must be smaller than first index with grad<0"
         x1, x2 = grid[i1], grid[i2]
-        RejectionSampler(f, support, (x1, x2); kwargs...)
+        @info "initial points found at $(x1), $(x2) with grads $(grads[i1]), $(grads[i2])"
+        RejectionSampler(f, support, (x1, x2); logdensity=logdensity, kwargs...)
     end
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl