Skip to content

Commit 97d7dc3

Browse files
authored
Add some precompiles (#201)
On the current release, the first call looks like this: ```julia julia> @time imfilter(img, KernelFactors.gaussian((3,3))); 2.135609 seconds (3.46 M allocations: 211.109 MiB, 12.01% gc time, 94.98% compilation time) julia> @time mapwindow(extrema, img, (3,3)) 0.756083 seconds (1.17 M allocations: 68.258 MiB, 6.57% gc time, 99.89% compilation time) ``` On this branch, it looks like this: ```julia julia> @time imfilter(img, KernelFactors.gaussian((3,3))); 0.620004 seconds (362.78 k allocations: 40.455 MiB, 1.47% gc time, 96.09% compilation time) julia> @time mapwindow(extrema, img, (3,3)) 0.385470 seconds (334.52 k allocations: 19.585 MiB, 9.43% gc time, 99.82% compilation time) ``` Quite a nice reduction in latency.
1 parent ceb8be4 commit 97d7dc3

File tree

7 files changed

+134
-17
lines changed

7 files changed

+134
-17
lines changed

precompile/script.jl

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# This is to be used with `@snoopi_deep` from SnoopCompile
2+
3+
using SnoopCompile
4+
using ImageCore
5+
using Statistics
6+
7+
images2d = [rand(Float32, 100, 100),
8+
rand(Float64, 100, 100),
9+
rand(Gray{N0f8}, 100, 100),
10+
rand(Gray{N0f16}, 100, 100),
11+
rand(RGB{N0f8}, 100, 100),
12+
]
13+
14+
images3d = [rand(Gray{N0f16}, 100, 100, 10),
15+
]
16+
17+
tinf = @snoopi_deep begin
18+
using ImageFiltering
19+
20+
Kernel.gaussian((3,3))
21+
Kernel.gaussian((3.0,3.0))
22+
Kernel.DoG(3)
23+
Kernel.DoG(3.0)
24+
Kernel.Laplacian()
25+
Kernel.LoG(3)
26+
Kernel.LoG(3.0)
27+
Kernel.sobel()
28+
29+
KernelFactors.gaussian((3,3))
30+
KernelFactors.gaussian((3.0,3.0))
31+
KernelFactors.IIRGaussian((3,3))
32+
KernelFactors.IIRGaussian(3.0)
33+
KernelFactors.sobel()
34+
35+
for img in images2d
36+
for kern in (Kernel.gaussian((3,3)),
37+
KernelFactors.gaussian((3,3)),
38+
KernelFactors.IIRGaussian((3.0, 3.0)))
39+
imfilter(img, kern)
40+
end
41+
if eltype(img) <: Union{Number,Gray}
42+
mapwindow(extrema, img, (3,3))
43+
mapwindow(median!, img, (3,3))
44+
end
45+
end
46+
47+
for img in images3d
48+
for kern in (Kernel.gaussian((3,3,3)),
49+
KernelFactors.gaussian((3,3,3)),
50+
KernelFactors.IIRGaussian((3.0, 3.0, 3.0)))
51+
imfilter(img, kern)
52+
end
53+
end
54+
end

src/ImageFiltering.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,8 @@ function __init__()
102102
end
103103
end
104104

105+
if Base.VERSION >= v"1.4.2"
106+
include("precompile.jl")
107+
end
108+
105109
end # module

src/border.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ The command `padarray(A,Fill(0,(1,1,1)))` yields
660660
padarray(img::AbstractArray, border::AbstractBorder) = padarray(eltype(img), img, border)
661661
function padarray(::Type{T}, img::AbstractArray, border) where {T}
662662
ba = BorderArray(img, border)
663-
out = similar(ba, T)
663+
out = similar(ba, T, axes(ba))
664664
copy!(out, ba)
665665
end
666666

@@ -897,23 +897,24 @@ end
897897
898898
Generate an index-vector to be used for padding. `inds` specifies the image axes along a particular axis; `lo` and `hi` are the amount to pad on the lower and upper, respectively, sides of this axis. `border` specifying the style of padding.
899899
"""
900-
function padindex(border::Pad, lo::Integer, inds::AbstractUnitRange, hi::Integer)
900+
function padindex(border::Pad, lo::Int, inds::UnitRange{Int}, hi::Int)
901901
if border.style == :replicate
902-
indsnew = vcat(fill(first(inds), lo), UnitRange(inds), fill(last(inds), hi))
902+
indsnew = Int[fill(first(inds), lo); inds; fill(last(inds), hi)]
903903
OffsetArray(indsnew, first(inds)-lo:last(inds)+hi)
904904
elseif border.style == :circular
905905
return modrange(extend(lo, inds, hi), inds)
906906
elseif border.style == :symmetric
907-
I = OffsetArray([inds; reverse(inds)], (0:2*length(inds)-1) .+ first(inds))
907+
I = OffsetArray(Int[inds; reverse(inds)], (0:2*length(inds)-1) .+ first(inds))
908908
r = modrange(extend(lo, inds, hi), axes(I, 1))
909909
return I[r]
910910
elseif border.style == :reflect
911-
I = OffsetArray([inds; last(inds)-1:-1:first(inds)+1], (0:2*length(inds)-3) .+ first(inds))
911+
I = OffsetArray(Int[inds; last(inds)-1:-1:first(inds)+1], (0:2*length(inds)-3) .+ first(inds))
912912
return I[modrange(extend(lo, inds, hi), axes(I, 1))]
913913
else
914914
error("border style $(border.style) unrecognized")
915915
end
916916
end
917+
padindex(border::Pad, lo::Integer, inds::AbstractUnitRange, hi::Integer) = padindex(border, Int(lo)::Int, UnitRange{Int}(inds)::UnitRange{Int}, Int(hi)::Int)
917918
function padindex(border::Pad, inner::AbstractUnitRange, outer::AbstractUnitRange)
918919
lo = max(0, first(inner)-first(outer))
919920
hi = max(0, last(outer)-last(inner))

src/kernel.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,4 +484,14 @@ end
484484

485485
reflectind(r::AbstractUnitRange) = -last(r):-first(r)
486486

487+
if Base.VERSION >= v"1.4.2" && ccall(:jl_generating_output, Cint, ()) == 1
488+
precompile(Laplacian, ())
489+
precompile(sobel, ())
490+
for T in (Int, Float64, Float32)
491+
precompile(gaussian, (Tuple{T,T},))
492+
precompile(DoG, (T,))
493+
precompile(LoG, (T,))
494+
end
495+
end
496+
487497
end

src/kernelfactors.jl

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,12 +408,13 @@ Construct a 1d gaussian kernel `g` with standard deviation `σ`, optionally
408408
providing the kernel length `l`. The default is to extend by two `σ`
409409
in each direction from the center. `l` must be odd.
410410
"""
411-
function gaussian::Real, l = 4*ceil(Int,σ)+1)
411+
function gaussian::Real, l::Int = 4*ceil(Int,σ)+1)
412412
isodd(l) || throw(ArgumentError("length must be odd"))
413413
w = l>>1
414414
g = σ == 0 ? [exp(0/(2*oftype(σ, 1)^2))] : [exp(-x^2/(2*σ^2)) for x=-w:w]
415415
centered(g/sum(g))
416416
end
417+
gaussian::Real, l::Integer) = gaussian(σ, Int(l)::Int)
417418

418419
"""
419420
gaussian((σ1, σ2, ...), [l]) -> (g1, g2, ...)
@@ -424,8 +425,8 @@ provide the kernel length `l`, which must be a tuple of the same
424425
length.
425426
"""
426427
gaussian(σs::NTuple{N,Real}, ls::NTuple{N,Integer}) where {N} =
427-
kernelfactors( map((σ,l)->gaussian(σ,l), σs, ls) )
428-
gaussian(σs::NTuple{N,Real}) where {N} = kernelfactors(map(σ->gaussian(σ), σs))
428+
kernelfactors( map(gaussian, σs, ls) )
429+
gaussian(σs::NTuple{N,Real}) where {N} = kernelfactors(map(gaussian, σs))
429430

430431
gaussian(σs::AbstractVector, ls::AbstractVector) = gaussian((σs...,), (ls...,))
431432
gaussian(σs::AbstractVector) = gaussian((σs...,))
@@ -573,4 +574,14 @@ end
573574

574575
kdim(keep::Bool, k) = keep ? centered(k) : OffsetArray([oneunit(eltype(k))], 0:0)
575576

577+
if Base.VERSION >= v"1.4.2" && ccall(:jl_generating_output, Cint, ()) == 1
578+
precompile(sobel, ())
579+
for T in (Int, Float64, Float32)
580+
precompile(gaussian, (Tuple{T,T},))
581+
precompile(gaussian, (T,))
582+
precompile(gaussian, (T, Int))
583+
precompile(IIRGaussian, (Tuple{T,T},))
584+
end
585+
end
586+
576587
end

src/mapwindow.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ and then `mapwindow(f, img, (m,n))` should filter at the 75th quantile.
6161
6262
See also: [`imfilter`](@ref).
6363
"""
64-
function mapwindow(f, img, window; border="replicate",
65-
indices=default_imginds(img, window, border), callmode=:copy!)
64+
function mapwindow(f::F, img, window; border="replicate",
65+
indices=default_imginds(img, window, border), callmode=:copy!) where F
6666
if callmode != :copy!
6767
error("Only callmode=:copy! is currently supported")
6868
end
@@ -88,7 +88,7 @@ function default_imginds(img, window, border::Inner)
8888
map(self_offset, indind)
8989
end
9090

91-
function _mapwindow(f, img, window, border, imginds)
91+
function _mapwindow(f::F, img, window, border, imginds) where F
9292
out = allocate_output(f, img, window, border, imginds)
9393
mapwindow_kernel!(f, out, img, window, border, imginds)
9494
end
@@ -223,20 +223,20 @@ function _indices_of_interiour_indices(fullimginds, imginds, kerinds)
223223
map(_indices_of_interiour_range, fullimginds, imginds, kerinds)
224224
end
225225

226-
function allocate_output(f, img, window, border, imginds)
226+
function allocate_output(f::F, img, window, border, imginds) where F
227227
T = compute_output_eltype(f, img, window, border, imginds)
228228
outinds = compute_output_indices(imginds)
229229
similar(img, T, outinds)
230230
end
231231

232-
function allocate_buffer(f, img, window)
232+
function allocate_buffer(f::F, img, window) where F
233233
T = eltype(img)
234234
buf = Array{T}(undef,map(length, window))
235235
bufrs = default_shape(f)(buf)
236236
buf, bufrs
237237
end
238238

239-
function compute_output_eltype(f, img, window, border, imginds)
239+
function compute_output_eltype(f::F, img, window, border, imginds) where F
240240
buf, bufrs = allocate_buffer(f, img, window)
241241
make_buffer_values_realistic!(buf, img, window, border, imginds)
242242
typeof(f(bufrs))
@@ -383,8 +383,8 @@ function _extrema_filter!(A::AbstractArray, w1, w...)
383383
if w1 > 1
384384
a = first(A)
385385
if w1 <= 20
386-
cache = ntuple(i->a, w1>>1)
387-
_extrema_filter1!(A, w1, cache)
386+
cache = ntuple(i->a, w1>>1) # this line is not inferrable, and contributes to latency via...
387+
_extrema_filter1!(A, w1, cache) # ...extensive specialization of _extrema_filter1! (on typeof(A) and value of w1)
388388
else
389389
n = w1>>1
390390
cache = CircularDeque{typeof(a)}(n)
@@ -395,7 +395,7 @@ function _extrema_filter!(A::AbstractArray, w1, w...)
395395
end
396396
end
397397
if ndims(A) > 1
398-
_extrema_filter!(permutedims(A, [2:ndims(A);1]), w...)
398+
_extrema_filter!(permutedims(A, [2:ndims(A);1:1]), w...)
399399
else
400400
return A
401401
end

src/precompile.jl

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
if ccall(:jl_generating_output, Cint, ()) == 1
2+
let
3+
images2d = [rand(Float32, 100, 100),
4+
rand(Float64, 100, 100),
5+
rand(Gray{N0f8}, 100, 100),
6+
rand(Gray{N0f16}, 100, 100),
7+
rand(RGB{N0f8}, 100, 100),
8+
]
9+
images3d = [rand(Gray{N0f16}, 100, 100, 10),
10+
]
11+
12+
for img in images2d
13+
for kern in (Kernel.gaussian((3,3)),
14+
KernelFactors.gaussian((3,3)),
15+
KernelFactors.IIRGaussian((3.0, 3.0)))
16+
for r in (CPU1(FIR()), CPUThreads(FIRTiled((5,5))), CPU1(FFT()))
17+
@assert precompile(imfilter, (typeof(r), typeof(img), typeof(kern)))
18+
end
19+
@assert precompile(imfilter, (typeof(img), typeof(kern)))
20+
end
21+
if eltype(img) <: Union{Number,Gray}
22+
@assert precompile(mapwindow, (typeof(extrema), typeof(img), typeof((3,3))))
23+
@assert precompile(mapwindow, (typeof(median!), typeof(img), typeof((3,3))))
24+
end
25+
end
26+
for img in images3d
27+
for kern in (Kernel.gaussian((3,3,3)),
28+
KernelFactors.gaussian((3,3,3)),
29+
KernelFactors.IIRGaussian((3.0, 3.0, 3.0)))
30+
for r in (CPU1(FIR()), CPUThreads(FIRTiled((5,5))), CPU1(FFT()))
31+
@assert precompile(imfilter, (typeof(r), typeof(img), typeof(kern)))
32+
end
33+
@assert precompile(imfilter, (typeof(img), typeof(kern)))
34+
end
35+
end
36+
end
37+
end

0 commit comments

Comments
 (0)