Skip to content

Commit 38c62a1

Browse files
oschulzandreasnoack
authored andcommitted
Add Base.norm, normalize and normalize! for Histogram (#243)
* Add Base.norm, normalize and normalize! for Histogram * Add Base.float for Histogram and support for aux_weights on normalize * Add tests for histogram norm and normalize * Clarify description of histogram hormalization modes * Add histogram normalization mode :none * Better exceptions and small coding style improvements in hist norm code Also removed some remnant code in hist norm tests. * Add proper docstrings for histogram norm, normalize and normalize! * Add tests for histogram normalize! and normalize with aux weights * Add new field isdensity to Histogram, refactor histogram code Enables consistent and idempotent behaviour of normalization, increases histogram filling performance. Changes: * New Histogram field isdensity * Support for isdensity in constructors, etc., includes refactoring of ctor code (removed duplicated code). * Refactoring of histogram fill code: * Removed duplicated code * New functions binindex and binvolume * Significant performance gain for push!/append! * Support for fit(Histogram{T}, ...) * Better structure of histograms tests with test sets * Remove _tuple_map from Histogram implementation Better to add this to Base. * Simplify implementation of _multi_getindex * Fix float(h::Histogram) * Handle h.isdensity * Don't copy more than necessary (as requested by A. Noack) * Change histogram norm to directly return integral value, not it's norm. * Extend == and show for Histogram to handle new isdensity field * Fix normalize! for Histogram Don't set h.isdensity to true for mode == :none * Improve numerical precision in implementation of Histogram norm/normalize * Use _edge_binvolume in implementation of Histogram norm/normlalize Also add optional result-type argument to binvolume * Change implementation of float(::Histogram), remove _float_deepcopy * Minor code pretty-up in push!(::Histogram)
1 parent 5a7fa11 commit 38c62a1

File tree

2 files changed

+431
-147
lines changed

2 files changed

+431
-147
lines changed

src/hist.jl

Lines changed: 245 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import Base: show, ==, push!, append!
1+
using Base.Cartesian
2+
3+
import Base: show, ==, push!, append!, float, norm, normalize, normalize!
24

35
# Mechanism for temporary deprecation of default for "closed" (because default
46
# value has changed). After deprecation is lifed, remove "_check_closed_arg"
@@ -14,6 +16,17 @@ function _check_closed_arg(closed::Symbol, funcsym)
1416
end
1517

1618

19+
## Fast getindex function for multiple arrays, returns a tuple of array elements
20+
@inline Base.@propagate_inbounds @generated function _multi_getindex(i::Integer, c::AbstractArray...)
21+
N = length(c)
22+
result_expr = Expr(:tuple)
23+
for j in 1:N
24+
push!(result_expr.args, :(c[$j][i]))
25+
end
26+
result_expr
27+
end
28+
29+
1730
## nice-valued ranges for histograms
1831
function histrange{T}(v::AbstractArray{T}, n::Integer, closed::Symbol=:default_left)
1932
closed = _check_closed_arg(closed,:histrange)
@@ -116,22 +129,24 @@ type Histogram{T<:Real,N,E} <: AbstractHistogram{T,N,E}
116129
edges::E
117130
weights::Array{T,N}
118131
closed::Symbol
132+
isdensity::Bool
119133
function (::Type{Histogram{T,N,E}}){T,N,E}(edges::NTuple{N,AbstractArray},
120-
weights::Array{T,N}, closed::Symbol)
134+
weights::Array{T,N}, closed::Symbol, isdensity::Bool=false)
121135
closed == :right || closed == :left || error("closed must :left or :right")
136+
isdensity && !(T <: AbstractFloat) && error("Density histogram must have float-type weights")
122137
map(x -> length(x)-1,edges) == size(weights) || error("Histogram edge vectors must be 1 longer than corresponding weight dimensions")
123-
new{T,N,E}(edges,weights,closed)
138+
new{T,N,E}(edges,weights,closed,isdensity)
124139
end
125140
end
126141

127-
Histogram{T,N}(edges::NTuple{N,AbstractVector},weights::AbstractArray{T,N},closed::Symbol=:default_left) =
128-
Histogram{T,N,typeof(edges)}(edges,weights,_check_closed_arg(closed,:Histogram))
142+
Histogram{T,N}(edges::NTuple{N,AbstractVector},weights::AbstractArray{T,N},closed::Symbol=:default_left, isdensity::Bool=false) =
143+
Histogram{T,N,typeof(edges)}(edges,weights,_check_closed_arg(closed,:Histogram),isdensity)
129144

130-
Histogram{T,N}(edges::NTuple{N,AbstractVector},::Type{T},closed::Symbol=:default_left) =
131-
Histogram(edges,zeros(T,map(x -> length(x)-1,edges)...),_check_closed_arg(closed,:Histogram))
145+
Histogram{T,N}(edges::NTuple{N,AbstractVector},::Type{T},closed::Symbol=:default_left, isdensity::Bool=false) =
146+
Histogram(edges,zeros(T,map(x -> length(x)-1,edges)...),_check_closed_arg(closed,:Histogram),isdensity)
132147

133-
Histogram{N}(edges::NTuple{N,AbstractVector},closed::Symbol=:default_left) =
134-
Histogram(edges,Int,_check_closed_arg(closed,:Histogram))
148+
Histogram{N}(edges::NTuple{N,AbstractVector},closed::Symbol=:default_left, isdensity::Bool=false) =
149+
Histogram(edges,Int,_check_closed_arg(closed,:Histogram),isdensity)
135150

136151
function show(io::IO, h::AbstractHistogram)
137152
println(io, typeof(h))
@@ -140,100 +155,259 @@ function show(io::IO, h::AbstractHistogram)
140155
println(io," ",e)
141156
end
142157
println(io,"weights: ",h.weights)
143-
print(io,"closed: ",h.closed)
158+
println(io,"closed: ",h.closed)
159+
print(io,"isdensity: ",h.isdensity)
144160
end
145161

146-
(==)(h1::Histogram,h2::Histogram) = (==)(h1.edges,h2.edges) && (==)(h1.weights,h2.weights) && (==)(h1.closed,h2.closed)
162+
(==)(h1::Histogram,h2::Histogram) = (==)(h1.edges,h2.edges) && (==)(h1.weights,h2.weights) && (==)(h1.closed,h2.closed) && (==)(h1.isdensity,h2.isdensity)
147163

148-
# 1-dimensional
149-
Histogram{T}(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:default_left) =
150-
Histogram((edge,), weights, _check_closed_arg(closed,:Histogram))
151164

152-
Histogram{T}(edge::AbstractVector, ::Type{T}, closed::Symbol=:default_left) =
153-
Histogram(edge, zeros(T,length(edge)-1), _check_closed_arg(closed,:Histogram))
165+
binindex{T,E}(h::AbstractHistogram{T,1,E}, x::Real) = binindex(h, (x,))[1]
154166

155-
Histogram(edge::AbstractVector,closed::Symbol=:default_left) =
156-
Histogram(edge, Int, _check_closed_arg(closed,:Histogram))
167+
binindex{T,N,E}(h::Histogram{T,N,E}, xs::NTuple{N,Real}) =
168+
map((edge, x) -> _edge_binindex(edge, h.closed, x), h.edges, xs)
157169

158-
function push!{T,E}(h::Histogram{T,1,E}, x::Real,w::Real)
159-
i = if h.closed == :right
160-
searchsortedfirst(h.edges[1], x) - 1
170+
@inline function _edge_binindex(edge::AbstractVector, closed::Symbol, x::Real)
171+
if closed == :right
172+
searchsortedfirst(edge, x) - 1
161173
else
162-
searchsortedlast(h.edges[1], x)
163-
end
164-
if 1 <= i <= length(h.weights)
165-
@inbounds h.weights[i] += w
174+
searchsortedlast(edge, x)
166175
end
167-
h
168176
end
177+
178+
179+
binvolume{T,E}(h::AbstractHistogram{T,1,E}, binidx::Integer) = binvolume(h, (binidx,))
180+
binvolume{V,T,E}(::Type{V}, h::AbstractHistogram{T,1,E}, binidx::Integer) = binvolume(V, h, (binidx,))
181+
182+
binvolume{T,N,E}(h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) =
183+
binvolume(promote_type(map(eltype, h.edges)...), h, binidx)
184+
185+
binvolume{V,T,N,E}(::Type{V}, h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) =
186+
prod(map((edge, i) -> _edge_binvolume(V, edge, i), h.edges, binidx))
187+
188+
@inline _edge_binvolume{V}(::Type{V}, edge::AbstractVector, i::Integer) = V(edge[i+1]) - V(edge[i])
189+
@inline _edge_binvolume{V}(::Type{V}, edge::Range, i::Integer) = V(step(edge))
190+
@inline _edge_binvolume(edge::AbstractVector, i::Integer) = _edge_binvolume(eltype(edge), edge, i)
191+
192+
193+
# 1-dimensional
194+
195+
Histogram{T}(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:default_left, isdensity::Bool=false) =
196+
Histogram((edge,), weights, closed, isdensity)
197+
198+
Histogram{T}(edge::AbstractVector, ::Type{T}, closed::Symbol=:default_left, isdensity::Bool=false) =
199+
Histogram((edge,), T, closed, isdensity)
200+
201+
Histogram(edge::AbstractVector, closed::Symbol=:default_left, isdensity::Bool=false) =
202+
Histogram((edge,), closed, isdensity)
203+
204+
205+
push!{T,E}(h::AbstractHistogram{T,1,E}, x::Real, w::Real) = push!(h, (x,), w)
169206
push!{T,E}(h::AbstractHistogram{T,1,E}, x::Real) = push!(h,x,one(T))
207+
append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector) = append!(h, (v,))
208+
append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector, wv::Union{AbstractVector,WeightVec}) = append!(h, (v,), wv)
170209

171-
function append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector)
172-
for x in v
173-
push!(h,x)
174-
end
175-
h
176-
end
177-
function append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector,wv::WeightVec)
178-
for (x,w) in zip(v,wv.values)
179-
push!(h,x,w)
180-
end
181-
h
182-
end
183210

184-
fit(::Type{Histogram},v::AbstractVector, edg::AbstractVector; closed::Symbol=:default_left) =
185-
append!(Histogram(edg,_check_closed_arg(closed,:fit)), v)
186-
fit(::Type{Histogram},v::AbstractVector; closed::Symbol=:default_left, nbins=sturges(length(v))) = begin
187-
closed = _check_closed_arg(closed,:fit)
188-
fit(Histogram, v, histrange(v,nbins,closed); closed=closed)
189-
end
211+
fit{T}(::Type{Histogram{T}},v::AbstractVector, edg::AbstractVector; closed::Symbol=:default_left) =
212+
fit(Histogram{T},(v,), (edg,), closed=closed)
213+
fit{T}(::Type{Histogram{T}},v::AbstractVector; closed::Symbol=:default_left, nbins=sturges(length(v))) =
214+
fit(Histogram{T},(v,); closed=closed, nbins=nbins)
215+
fit{T}(::Type{Histogram{T}},v::AbstractVector, wv::WeightVec, edg::AbstractVector; closed::Symbol=:default_left) =
216+
fit(Histogram{T},(v,), wv, (edg,), closed=closed)
217+
fit{T}(::Type{Histogram{T}},v::AbstractVector, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(v))) =
218+
fit(Histogram{T}, (v,), wv; closed=closed, nbins=nbins)
219+
220+
fit{W}(::Type{Histogram}, v::AbstractVector, wv::WeightVec{W}, args...; kwargs...) = fit(Histogram{W}, v, wv, args...; kwargs...)
190221

191-
fit{W}(::Type{Histogram},v::AbstractVector, wv::WeightVec{W}, edg::AbstractVector; closed::Symbol=:default_left) =
192-
append!(Histogram(edg,W,_check_closed_arg(closed,:fit)), v, wv)
193-
fit(::Type{Histogram},v::AbstractVector, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(v))) = begin
194-
closed = _check_closed_arg(closed,:fit)
195-
fit(Histogram, v, wv, histrange(v,nbins,closed); closed=closed)
196-
end
197222

198223
# N-dimensional
224+
199225
function push!{T,N}(h::Histogram{T,N},xs::NTuple{N,Real},w::Real)
200-
is = if h.closed == :right
201-
map((edge, x) -> searchsortedfirst(edge,x) - 1, h.edges, xs)
202-
else
203-
map(searchsortedlast, h.edges, xs)
226+
h.isdensity && error("Density histogram must have float-type weights")
227+
idx = binindex(h, xs)
228+
if checkbounds(Bool, h.weights, idx...)
229+
@inbounds h.weights[idx...] += w
204230
end
205-
try
206-
h.weights[is...] += w
207-
catch e
208-
isa(e,BoundsError) || rethrow(e)
231+
h
232+
end
233+
234+
function push!{T<:AbstractFloat,N}(h::Histogram{T,N},xs::NTuple{N,Real},w::Real)
235+
idx = binindex(h, xs)
236+
if checkbounds(Bool, h.weights, idx...)
237+
@inbounds h.weights[idx...] += h.isdensity ? w / binvolume(h, idx) : w
209238
end
210239
h
211240
end
241+
212242
push!{T,N}(h::AbstractHistogram{T,N},xs::NTuple{N,Real}) = push!(h,xs,one(T))
213243

244+
214245
function append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector})
215-
for xs in zip(vs...)
216-
push!(h,xs)
246+
@inbounds for i in eachindex(vs...)
247+
xs = _multi_getindex(i, vs...)
248+
push!(h, xs, one(T))
217249
end
218250
h
219251
end
220-
function append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector},wv::WeightVec)
221-
for (xs,w) in zip(zip(vs...),wv.values)
222-
push!(h,xs,w)
252+
function append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::AbstractVector)
253+
@inbounds for i in eachindex(wv, vs...)
254+
xs = _multi_getindex(i, vs...)
255+
push!(h, xs, wv[i])
223256
end
224257
h
225258
end
259+
append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::WeightVec) = append!(h, vs, values(wv))
226260

227-
fit{N}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) =
228-
append!(Histogram(edges,_check_closed_arg(closed,:fit)), vs)
229-
fit{N}(::Type{Histogram}, vs::NTuple{N,AbstractVector}; closed::Symbol=:default_left, nbins=sturges(length(vs[1]))) = begin
261+
262+
fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) =
263+
append!(Histogram(edges, T, _check_closed_arg(closed,:fit), false), vs)
264+
265+
fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:default_left, isdensity::Bool=false, nbins=sturges(length(vs[1]))) = begin
230266
closed = _check_closed_arg(closed,:fit)
231-
fit(Histogram, vs, histrange(vs,nbins,closed); closed=closed)
267+
fit(Histogram{T}, vs, histrange(vs,nbins,closed); closed=closed)
232268
end
233269

234-
fit{N,W}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) =
235-
append!(Histogram(edges,W,_check_closed_arg(closed,:fit)), vs, wv)
236-
fit{N}(::Type{Histogram},vs::NTuple{N,AbstractVector}, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(vs[1]))) = begin
270+
fit{T,N,W}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) =
271+
append!(Histogram(edges, T, _check_closed_arg(closed,:fit), false), vs, wv)
272+
273+
fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec; closed::Symbol=:default_left, isdensity::Bool=false, nbins=sturges(length(vs[1]))) = begin
237274
closed = _check_closed_arg(closed,:fit)
238-
fit(Histogram, vs, wv, histrange(vs,nbins,closed); closed=closed)
275+
fit(Histogram{T}, vs, wv, histrange(vs,nbins,closed); closed=closed)
276+
end
277+
278+
fit(::Type{Histogram}, args...; kwargs...) = fit(Histogram{Int}, args...; kwargs...)
279+
fit{N,W}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, args...; kwargs...) = fit(Histogram{W}, vs, wv, args...; kwargs...)
280+
281+
282+
# Get a suitable high-precision type for the norm of a histogram.
283+
@generated function norm_type{T, N, E}(h::Histogram{T, N, E})
284+
args = [:( eltype(edges[$d]) ) for d = 1:N]
285+
quote
286+
edges = h.edges
287+
norm_type(promote_type(T, $(args...)))
288+
end
289+
end
290+
291+
norm_type{T<:Integer}(::Type{T}) = promote_type(T, Int64)
292+
norm_type{T<:AbstractFloat}(::Type{T}) = promote_type(T, Float64)
293+
294+
295+
"""
296+
norm(h::Histogram)
297+
298+
Calculate the norm of histogram `h` as the absolute value of its integral.
299+
"""
300+
@generated function norm{T, N, E}(h::Histogram{T, N, E})
301+
quote
302+
edges = h.edges
303+
weights = h.weights
304+
SumT = norm_type(h)
305+
v_0 = 1
306+
s_0 = zero(SumT)
307+
@inbounds @nloops(
308+
$N, i, weights,
309+
d -> begin
310+
v_{$N-d+1} = v_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)
311+
s_{$N-d+1} = zero(SumT)
312+
end,
313+
d -> begin
314+
s_{$N-d} += s_{$N-d+1}
315+
end,
316+
begin
317+
$(Symbol("s_$(N)")) += (@nref $N weights i) * $(Symbol("v_$N"))
318+
end
319+
)
320+
s_0
321+
end
322+
end
323+
324+
325+
float{T<:AbstractFloat, N, E}(h::Histogram{T, N, E}) = h
326+
327+
float{T, N, E}(h::Histogram{T, N, E}) = Histogram(h.edges, float(h.weights), h.closed, h.isdensity)
328+
329+
330+
331+
"""
332+
normalize!{T<:AbstractFloat, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf)
333+
334+
Normalize the histogram `h` and optionally scale one or more auxiliary weight
335+
arrays appropriately. See description of `normalize` for details. Returns `h`.
336+
"""
337+
@generated function normalize!{T<:AbstractFloat, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf)
338+
quote
339+
edges = h.edges
340+
weights = h.weights
341+
342+
for A in aux_weights
343+
(size(A) != size(weights)) && throw(DimensionMismatch("aux_weights must have same size as histogram weights"))
344+
end
345+
346+
if mode == :none
347+
# nothing to do
348+
elseif mode == :pdf || mode == :density
349+
if h.isdensity
350+
if mode == :pdf
351+
# histogram already represents a density, just divide weights by norm
352+
s = 1/norm(h)
353+
weights .*= s
354+
for A in aux_weights
355+
A .*= s
356+
end
357+
else
358+
# histogram already represents a density, nothing to do
359+
end
360+
else
361+
# Divide weights by bin volume, for :pdf also divide by sum of weights
362+
SumT = norm_type(h)
363+
vs_0 = (mode == :pdf) ? sum(SumT(x) for x in weights) : one(SumT)
364+
@inbounds @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin
365+
(@nref $N weights i) /= $(Symbol("vs_$N"))
366+
for A in aux_weights
367+
(@nref $N A i) /= $(Symbol("vs_$N"))
368+
end
369+
end
370+
end
371+
h.isdensity = true
372+
else mode != :pdf && mode != :density
373+
throw(ArgumentError("Normalization mode must be :pdf, :density or :none"))
374+
end
375+
h
376+
end
377+
end
378+
379+
380+
"""
381+
normalize{T, N, E}(h::Histogram{T, N, E}; mode::Symbol = :pdf)
382+
383+
Normalize the histogram `h`.
384+
385+
Valid values for `mode` are:
386+
387+
* `:pdf`: Normalize by sum of weights and bin sizes. Resulting histogram
388+
has norm 1 and represents a PDF.
389+
* `:density`: Normalize by bin sizes only. Resulting histogram represents
390+
count density of input and does not have norm 1. Will not modify the
391+
histogram if it already represents a density (`h.isdensity == 1`).
392+
* `:none`: Leaves histogram unchanged. Useful to simplify code that has to
393+
conditionally apply different modes of normalization.
394+
"""
395+
normalize{T, N, E}(h::Histogram{T, N, E}; mode::Symbol = :pdf) =
396+
normalize!(deepcopy(float(h)), mode = mode)
397+
398+
399+
"""
400+
normalize{T, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf)
401+
402+
Normalize the histogram `h` and rescales one or more auxiliary weight arrays
403+
at the same time (`aux_weights` may, e.g., contain estimated statistical
404+
uncertainties). The values of the auxiliary arrays are scaled by the same
405+
factor as the corresponding histogram weight values. Returns a tuple of the
406+
normalized histogram and scaled auxiliary weights.
407+
"""
408+
function normalize{T, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf)
409+
h_fltcp = deepcopy(float(h))
410+
aux_weights_fltcp = map(x -> deepcopy(float(x)), aux_weights)
411+
normalize!(h_fltcp, aux_weights_fltcp..., mode = mode)
412+
(h_fltcp, aux_weights_fltcp...)
239413
end

0 commit comments

Comments
 (0)