Skip to content

Commit 76b1574

Browse files
committed
add lscv bandwidth selector
1 parent a42b129 commit 76b1574

File tree

3 files changed

+36
-2
lines changed

3 files changed

+36
-2
lines changed

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
julia 0.3-
22
StatsBase
33
Distributions 0.4.6
4+
Optim

src/KernelDensity.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module KernelDensity
22

33
using StatsBase
44
using Distributions
5+
using Optim
56

67
import Base: conv
78
import StatsBase: RealVector, RealMatrix

src/univariate.jl

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ function tabulate(data::RealVector, midpoints::Range)
8686
end
8787
end
8888

89-
# returns an un-convolved KDE
89+
# returns an un-convolved KDE
9090
UnivariateKDE(midpoints, grid)
9191
end
9292

@@ -126,7 +126,7 @@ end
126126

127127
function kde(data::RealVector, dist::UnivariateDistribution;
128128
boundary::(Real,Real)=kde_boundary(data,std(dist)), npoints::Int=2048)
129-
129+
130130
midpoints = kde_range(boundary,npoints)
131131
kde(data,midpoints,dist)
132132
end
@@ -144,3 +144,35 @@ function kde(data::RealVector; bandwidth=default_bandwidth(data), kernel=Normal,
144144
dist = kernel_dist(kernel,bandwidth)
145145
kde(data,dist;boundary=boundary,npoints=npoints)
146146
end
147+
148+
#change the M to some larger value to get better precision of lscv
149+
function bandwidth_lscv(data::RealVector; kernel::DataType=Normal, M=1024)
150+
n=length(data)
151+
h0=default_bandwidth(data)
152+
hlb = h0/sqrt(n)
153+
hub = sqrt(n)*h0
154+
xlb, xub = extrema(data)
155+
midpoints = kde_range((xlb-4*h0, xub+4*h0), M)
156+
157+
k = tabulate(data, midpoints)
158+
# the ft here is M/ba*sqrt(2pi) * u(s), it is M times the Yl in Silverman's book
159+
Yl2 = abs2(rfft(k.density)/M)
160+
161+
ba = step(k.x)*M # the range b -a
162+
c = -twoπ/ba
163+
164+
return Optim.optimize(h -> lscv(h, Yl2, kernel, c, ba, n,M), hlb, hub).minimum
165+
end
166+
167+
#Silverman's book use the special case of gaussian kernel. Here the method is generalized to any symmetric kernel
168+
function lscv(bandwidth::Real, Yl2::RealVector, kernel::DataType, c::Real, ba::Real, n::Int,M::Int)
169+
dist = kernel_dist(kernel,bandwidth)
170+
zeta_star = zeros(length(Yl2))
171+
for j = 1:length(Yl2)
172+
ksl = real(cf(dist,j*c))
173+
zeta_star[j] = Yl2[j] * (ksl * ksl - 2 * ksl)
174+
end
175+
#Correct the error in silverman's book
176+
#∫ (cf^2 -2cf)u(s)²ds <- ∑(cf^2 - 2cf)*Yl2*ba²/2pi * c
177+
sum(zeta_star) * abs(c)*ba*ba/(2*pi) + pdf(dist, 0.0)/n
178+
end

0 commit comments

Comments
 (0)