Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Distributions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ export
TriangularDist,
Triweight,
Truncated,
Tweedie,
Uniform,
UnivariateGMM,
VonMises,
Expand Down Expand Up @@ -363,7 +364,7 @@ Supported distributions:
NoncentralF, NoncentralHypergeometric, NoncentralT, Normal, NormalCanon,
NormalInverseGaussian, Pareto, PGeneralizedGaussian, Poisson, PoissonBinomial,
QQPair, Rayleigh, Rician, Skellam, Soliton, StudentizedRange, SymTriangularDist, TDist, TriangularDist,
Triweight, Truncated, Uniform, UnivariateGMM,
Triweight, Truncated, Tweedie, Uniform, UnivariateGMM,
VonMises, VonMisesFisher, WalleniusNoncentralHypergeometric, Weibull,
Wishart, ZeroMeanIsoNormal, ZeroMeanIsoNormalCanon,
ZeroMeanDiagNormal, ZeroMeanDiagNormalCanon, ZeroMeanFullNormal,
Expand Down
244 changes: 244 additions & 0 deletions src/univariate/continuous/tweedie.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
"""
Tweedie(μ,σ,p)

The *Tweedie distribution* with mean `μ ≥ 0`, dispersion `σ ≥ 0` and power `1 ≥ p ≥ 2`.
When ``p = 1`` and ``\\sigma = 1`` it is equivalent to a quasi-Poisson distribution,
and when ``p = 2`` to the Gamma distribution. When ``1 > p > 2``, it is a compound
Poisson-Gamma distribution, with probability density function:

```math
f(x; \\mu, \\sigma, p) = \\frac{1}{x} W(x, \\sigma^2, p) exp \\left(
\\frac{1}{\\sigma^2}} [ x \\frac{\\mu^(1-p)}{1-p} - \\frac{\\mu^(2-p)}{2-p} ]
\\right), \\quad x > 0
```
where ``W`` is [Wright's generalized Bessel function](https://en.wikipedia.org/wiki/Bessel%E2%80%93Maitland_function).

Note that if ``1 > p > 2`` then the distribution is continuous with a point mass concentrated at zero.
If ``p = 1`` then the distribution is discrete.

Computation of [`pdf`](@ref) and [`logpdf`](@ref) is carried out using `Float64`.
Accuracy is generally higher than 1e-11, though for some parameter values it can
be as low as 1e-8.

```julia
Tweedie(μ, σ, p) # Tweedie distribution with location μ, scale σ and power p

params(d) # Get the parameters, i.e. (μ, σ, p)
location(d) # Get the location parameter, i.e. μ
scale(d) # Get the scale parameter, i.e. σ
shape(d) # Get the shape parameter, i.e. p

mean(d) # Get the mean, i.e. μ
var(d) # Get the variance, i.e. σ^2 * μ^p
```

External links

- [Tweedie distribution on Wikipedia](https://en.wikipedia.org/wiki/Tweedie_distribution)
- [Compound Poisson distribution on Wikipedia](https://en.wikipedia.org/wiki/Compound_Poisson_distribution)

References

- Dunn P. K., Smyth G. K. (2005). "Series evaluation of Tweedie exponential dispersion model densities"
*Statistics and Computing* 15: 267–280.
"""
struct Tweedie{T <: Real} <: ContinuousUnivariateDistribution
μ::T
σ::T
p::T

Tweedie{T}(µ::T, σ::T, p::T) where {T<:Real} = new{T}(µ, σ, p)
end

function Tweedie(μ::T, σ::T, p::T; check_args::Bool=true) where {T <: Real}
@check_args Tweedie (μ, μ >= zero(μ))
@check_args Tweedie (σ, σ >= zero(σ))
@check_args Tweedie (p, 1 <= p <= 2)
return Tweedie{T}(μ, σ, p)
end

#### Outer constructors
Tweedie(μ::Real, σ::Real, p::Real; check_args::Bool=true) =
Tweedie(promote(μ, σ, p)...; check_args=check_args)
Tweedie(μ::Integer, σ::Integer, p::Integer; check_args::Bool=true) =
Tweedie(float(μ), float(σ), float(p); check_args=check_args)

#### Conversions
convert(::Type{Tweedie{T}}, μ::S, σ::S, p::S) where {T <: Real, S <: Real} = Tweedie(T(μ), T(σ), T(p))
Base.convert(::Type{Tweedie{T}}, d::Tweedie) where {T<:Real} = Tweedie{T}(T(d.μ), T(d.σ), T(d.p))
Base.convert(::Type{Tweedie{T}}, d::Tweedie{T}) where {T<:Real} = d

@distr_support Tweedie 0 Inf

#### Parameters

params(d::Tweedie) = (d.μ, d.σ, d.p)
partype(d::Tweedie{T}) where {T} = T

location(d::Tweedie) = d.μ
scale(d::Tweedie) = d.σ
shape(d::Tweedie) = d.p

Base.eltype(::Type{Tweedie{T}}) where {T} = T

#### Statistics

mean(d::Tweedie) = d.μ
var(d::Tweedie) = d.σ^2 * d.μ^d.p

# Clark, David R. and Charles A. Thayer. 2004.
# “A Primer on the Exponential Family of Distributions.” CAS Discussion Paper Program, 117-148
# https://www.casact.org/sites/default/files/database/dpp_dpp04_04dpp117.pdf
skewness(d::Tweedie) = d.p * d.σ / sqrt(d.μ ^ (2 - d.p))
kurtosis(d::Tweedie) = d.p * (2 * d.p - 1) * d.σ^2 / d.μ ^ (2 - d.p)

function logpdf(d::Tweedie{T}, x::Real) where {T <: Real}
isnan(x) && return eltype(d)(NaN)
x >= 0 || return -Inf
# See: Dunn, Smyth (2005). "Series evaluation of Tweedie exponential dispersion model densities"
# Statistics and Computing 15: 267–280.
# pdf(y, μ, p, ϕ) = f(y, θ, ϕ) = c(y, ϕ) * exp(1/ϕ (y θ - κ(θ)))
# κ = cumulant function
# θ = function of expectation μ and power p
# α = (2-p)/(1-p)
# ϕ = σ^2
# y = x
# for 1<p<2:
# c(y, ϕ) = 1/y * wrightbessel(a, b, z)
# a = -α
# b = 0
# z = (p-1)^α/(2-p) / y^α / ϕ^(1-α)
μ, p, ϕ = d.μ, d.p, d.σ^2
if p == 1
return logpdf(Poisson(μ / ϕ), x / ϕ)
elseif p == 2
return logpdf(Gamma(1 / ϕ, μ * ϕ), x)
else
θ = μ ^ (1 - p) / (1 - p)
κ = μ ^ (2 - p) / (2 - p)
α = (2 - p) / (1 - p)

res = (x * θ - κ) / ϕ
if x > 0
z = ((p - 1) * ϕ / x) ^ α / ((2 - p) * ϕ)
# Use log to reduce risks of overflow when p is close to 1
wb = logwrightbessel(Float64(-α), 0.0, Float64(z))
# Overflow in `logwrightbessel` doesn't generally indicate that the PDF
# value would be larger than `typemax(Float64)`
wb == Inf && return NaN
res += wb - log(x)
end
return T(res)
end
end

function cdf(d::Tweedie, x::Real)
isnan(x) && return eltype(d)(NaN)
x == Inf && return one(eltype(d))
x >= 0 || return zero(eltype(d))
μ = d.μ
p = d.p
ϕ = d.σ^2
if p == 1
return cdf(Poisson(μ / ϕ), x / ϕ)
elseif p == 2
return cdf(Gamma(1 / ϕ, μ * ϕ), x)
else
# the mass at zero has to be handled separately as `quadgk` never evaluates at bounds
return pdf(d, 0) + quadgk(xi -> pdf(d, xi), 0, x, rtol=1e-12)[1]
end
end

function rand(rng::AbstractRNG, d::Tweedie)
μ, p, ϕ = d.μ, d.p, d.σ^2
# note that sources often use β = 1/θ for Gamma distribution
# e.g. https://en.wikipedia.org/wiki/Compound_Poisson_distribution
if p == 1
return ϕ * rand(rng, Poisson(μ / ϕ))
elseif p == 2
return rand(rng, Gamma(1 / ϕ, μ * ϕ))
else
λ = μ^(2 - p) / ((2 - p) * ϕ)
α = (2 - p) / (1 - p)
θ = ((p - 1) * ϕ) / μ^(1 - p)
N = rand(rng, Poisson(λ))
return N == 0 ? zero(θ) : rand(rng, Gamma(- N * α, θ))
end
end

# Implementation inspired by `qtweedie` in R package tweedie
# licensed under MIT with authorization from Peter Dunn
function quantile(d::Tweedie{T}, q::Real) where {T <: Real}
μ, ϕ, p = d.μ, d.σ^2, d.p

if q == 0
return zero(T)
elseif q == 1
return convert(T, Inf)
elseif q < 0 || q > 1
throw(DomainError(q, "q must be between 0 and 1"))
end

if p == 1
return ϕ * quantile(Poisson(μ / ϕ), q)
elseif p == 2
return quantile(Gamma(1 / ϕ, μ * ϕ), q)
else
# Handle point mass at zero
p_zero = pdf(d, 0)
if q <= p_zero
return zero(T)
end

# Starting values via interpolation between Poisson and Gamma quantiles
qp = ϕ * quantile(Poisson(μ / ϕ), q)
qg = quantile(Gamma(1 / ϕ, μ * ϕ), q)
startx = (qg - qp) * p + (2 * qp - qg)

qstart = cdf(d, startx)
rx = lx = startx
if qstart == q
return startx
elseif qstart > q
while true
lx = lx / 2
cdf(d, lx) < q && break
end
elseif qstart < q
while true
rx = 1.5 * (rx + 2)
cdf(d, rx) > q && break
end
end

# Cannot use `quantile_newton` as pdf is sometimes multimodal
return quantile_bisect(d, q, lx, rx)
end
end

function cquantile(d::Tweedie, q::Real)
0 <= q <= 1 || throw(DomainError(q, "q must be between 0 and 1"))
cq = 1.0 - q
# Allow for 1 eps tolerance as due to the mass at zero
# if `1 - q` is rounded up when storing in floating point,
# `cquantile(d, ccdf(d, 0))` can be very different from zero,
# which doesn't make mathematical sense
if d.p < 2 && cq <= nextfloat(pdf(d, 0))
return zero(eltype(d))
else
return quantile(d, cq)
end
end

function invlogccdf(d::Tweedie, lp::Real)
p = -expm1(lp)
# Allow for 1 eps tolerance as due to the mass at zero
# if `1 - q` is rounded up when storing in floating point,
# `invlogccdf(d, logccdf(d, 0))` can be very different from zero,
# which doesn't make mathematical sense
if d.p < 2 && p <= nextfloat(pdf(d, 0))
return zero(eltype(d))
else
return quantile(d, p)
end
end
1 change: 1 addition & 0 deletions src/univariates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ const continuous_distributions = [
"tdist",
"triangular",
"triweight",
"tweedie",
"uniform",
"loguniform", # depends on Uniform
"vonmises",
Expand Down
44 changes: 44 additions & 0 deletions test/ref/continuous/tweedie.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Tweedie Distribution
# Using R's tweedie package for reference implementation

Tweedie <- R6Class("Tweedie",
inherit = ContinuousDistribution,
public = list(
names = c("mu", "sigma", "p"),
mu = NA,
sigma = NA,
p = NA,
initialize = function(mu, sigma, p) {
self$mu <- mu
self$sigma <- sigma
self$p <- p
},
supp = function() { c(0, Inf) },
properties = function() {
mu <- self$mu
sigma <- self$sigma
p <- self$p
list(location=mu,
scale=sigma,
shape=p,
mean=mu,
var=mu^p * sigma^2,
skewness=p * sigma / sqrt(mu ^ (2 - p)),
kurtosis=p * (2 * p - 1) * sigma^2 / mu ^ (2 - p))
},
pdf = function(x, log=FALSE) {
val <- tweedie::dtweedie(x, mu=self$mu, phi=self$sigma^2, power=self$p)
if (log) {
return(log(val))
} else {
return(val)
}
},
cdf = function(x) {
tweedie::ptweedie(x, mu=self$mu, phi=self$sigma^2, power=self$p)
},
quan = function(v) {
tweedie::qtweedie(v, mu=self$mu, phi=self$sigma^2, power=self$p)
}
)
)
6 changes: 6 additions & 0 deletions test/ref/continuous_test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,12 @@ TruncatedNormal(27, 3, 0, Inf)
TruncatedNormal(-5, 1, -Inf, -10)
TruncatedNormal(1.8, 1.2, -Inf, 0)

Tweedie(2.0, 1.5, 1)
Tweedie(2.0, 1.5, 1.01)
Tweedie(2.0, 1.5, 1.5)
Tweedie(2.0, 1.5, 1.99)
Tweedie(2.0, 1.5, 2.0)

Uniform()
Uniform(0.0, 2.0)
Uniform(3.0, 17.0)
Expand Down
Loading
Loading