fix show method, doc-string and implement arithmetic

ablaom · ablaom · commit 608e0d92d3fd · 2021-11-25T15:19:41.000+13:00
diff --git a/src/CategoricalDistributions.jl b/src/CategoricalDistributions.jl
@@ -9,6 +9,7 @@ using Random
 using UnicodePlots
 
 const Dist = Distributions
+const MAX_NUM_LEVELS_TO_SHOW_BARS = 12
 
 import Distributions: pdf, logpdf, support, mode
 
diff --git a/src/methods.jl b/src/methods.jl
@@ -79,11 +79,22 @@ function Base.show(stream::IO, d::UnivariateFinite)
     print(stream, "UnivariateFinite{$(d.scitype)}($arg_str)")
 end
 
+Base.show(io::IO, mime::MIME"text/plain",
+          d::UnivariateFinite) = show(io, d)
+
+# in common case of `Real` probabilities we can do a pretty bar plot:
 function Base.show(io::IO, mime::MIME"text/plain",
-                   d::UnivariateFinite{S}) where S
+                   d::UnivariateFinite{<:Finite{K},V,R,P}) where {K,V,R,P<:Real}
+    show_bars = false
+    if K <= MAX_NUM_LEVELS_TO_SHOW_BARS &&
+        all(>=(0), values(d.prob_given_ref))
+        show_bars = true
+    end
+    show_bars || return show(io, d)
     s = support(d)
     x = string.(CategoricalArrays.DataAPI.unwrap.(s))
     y = pdf.(d, s)
+    S = d.scitype
     plt = barplot(x, y, title="UnivariateFinite{$S}")
     show(io, mime, plt)
 end
@@ -371,3 +382,59 @@ function Dist.fit(d::Type{<:UnivariateFinite},
 end
 
 
+# ## ARITHMETIC
+
+const ERR_DIFFERENT_SAMPLE_SPACES = ArgumentError(
+    "Adding two `UnivariateFinite` objects whose "*
+    "sample spaces have different labellings is not allowed. ")
+
+import Base: +, *, /
+
+function +(d1::U, d2::U) where U <: UnivariateFinite
+    classes(d1) == classes(d2) || throw(ERR_DIFFERENT_SAMPLE_SPACES)
+    S = d1.scitype
+    decoder = d1.decoder
+    prob_given_ref = copy(d1.prob_given_ref)
+    for ref in keys(prob_given_ref)
+        prob_given_ref[ref] += d2.prob_given_ref[ref]
+    end
+    return UnivariateFinite(S, decoder, prob_given_ref)
+end
+
+function -(d::UnivariateFinite)
+    S = d.scitype
+    decoder = d.decoder
+    prob_given_ref = copy(d.prob_given_ref)
+    for ref in keys(prob_given_ref)
+        prob_given_ref[ref] = -prob_given_ref[ref]
+    end
+    return UnivariateFinite(S, decoder, prob_given_ref)
+end
+
+function -(d1::U, d2::U) where U <: UnivariateFinite
+    classes(d1) == classes(d2) || throw(ERR_DIFFERENT_SAMPLE_SPACES)
+    S = d1.scitype
+    decoder = d1.decoder
+    prob_given_ref = copy(d1.prob_given_ref)
+    for ref in keys(prob_given_ref)
+        prob_given_ref[ref] -= d2.prob_given_ref[ref]
+    end
+    return UnivariateFinite(S, decoder, prob_given_ref)
+end
+
+# TODO: remove type restrction on `x` in the following methods if
+# https://github.com/JuliaStats/Distributions.jl/issues/1438 is
+# resolved. Currently we'd have a method ambiguity
+
+function *(d::UnivariateFinite, x::Real)
+    S = d.scitype
+    decoder = d.decoder
+    prob_given_ref = copy(d.prob_given_ref)
+    for ref in keys(prob_given_ref)
+        prob_given_ref[ref] *= x
+    end
+    return UnivariateFinite(d.scitype, decoder, prob_given_ref)
+end
+*(x::Real, d::UnivariateFinite) = d*x
+
+/(d::UnivariateFinite, x::Real) = d*inv(x)
diff --git a/src/types.jl b/src/types.jl
@@ -14,12 +14,15 @@ choosing `probs` to be an array of one higher dimension than the array
 generated.
 
 Here the word "probabilities" is an abuse of terminology as there is
-no requirement that probabilities actually sum to one, only that they
-be non-negative. So `UnivariateFinite` objects actually implement
-arbitrary non-negative measures over finite sets of labelled points. A
+no requirement that the that probabilities actually sum to one. Indeed
+there is no restriction on the probablities at all. In particular,
+`UnivariateFinite` objects implement arbitrary non-negative, signed,
+or complex measures over finite sets of labelled points. A
 `UnivariateDistribution` will be a bona fide probability measure when
-constructed using the `augment=true` option (see below) or when
-`fit` to data.
+constructed using the `augment=true` option (see below) or when `fit`
+to data. And the probabilities of a `UnivariateFinite` object `d` must
+be non-negative, with a non-zero sum, for `rand(d)` to be defined and
+interpretable.
 
 Unless `pool` is specified, `support` should have type
  `AbstractVector{<:CategoricalValue}` and all elements are assumed to
@@ -144,12 +147,15 @@ const _UnivariateFinite_{S} =
 
 # Note that the keys of `prob_given_ref` need not exhaust all the
 # refs of all classes but will be ordered (LittleDicts preserve order)
+DOC_CONSTRUCTOR
 struct UnivariateFinite{S,V,R,P} <: _UnivariateFinite_{S}
     scitype::Type{S}
     decoder::CategoricalDecoder{V,R}
     prob_given_ref::LittleDict{R,P,Vector{R}, Vector{P}}
 end
 
+@doc DOC_CONSTRUCTOR UnivariateFinite
+
 """
     UnivariateFiniteArray
 
diff --git a/test/methods.jl b/test/methods.jl
@@ -277,6 +277,31 @@ end
     # @test v ≈ v_close
 end
 
+@tesset "arithmetic" begin
+    L = ["yes", "no"]
+    d1 = UnivariateFinite(L, rand(rng, 2), pool=missing)
+    d2 = UnivariateFinite(L, rand(rng, 2), pool=missing)
+
+    # addition and subtraction:
+    for op in [:+, :-]
+        quote
+            s = $op(d1, d2 )
+            @test $op(pdf(d1, L), pdf(d2, L)) ≈ pdf(s, L)
+        end |> eval
+    end
+
+    # negative:
+    d_neg = -d1
+    @test pdf(d_neg, L) == -pdf(d1, L)
+
+    # multiplication by scalar:
+    d3 = d1/42
+    @test pdf(d3, L) ≈ pdf(d1, L)/42
+
+    # division by scalar:
+    d3 = d1/42
+    @test pdf(d3, L) ≈ pdf(d1, L)/42
+end
 
 end # module