11using ImageFiltering, FFTW, LinearAlgebra, Profile, Random
2- # using ProfileView
32using ComputationalResources
43
54FFTW. set_num_threads(parse(Int, get(ENV , " FFTW_NUM_THREADS" , " 1" )))
65BLAS. set_num_threads(parse(Int, get(ENV , " BLAS_NUM_THREADS" , string(Threads. nthreads() ÷ 2 ))))
76
8- function benchmark (mats)
7+ function benchmark_new (mats)
98 kernel = ImageFiltering. factorkernel(Kernel. LoG(1 ))
109 Threads. @threads for mat in mats
1110 frame_filtered = deepcopy(mat[:, :, 1 ])
@@ -17,6 +16,18 @@ function benchmark(mats)
1716 return
1817 end
1918end
19+ function benchmark_old(mats)
20+ kernel = ImageFiltering. factorkernel(Kernel. LoG(1 ))
21+ Threads. @threads for mat in mats
22+ frame_filtered = deepcopy(mat[:, :, 1 ])
23+ r_noncached = CPU1(Algorithm. FFT())
24+ for i in axes(mat, 3 )
25+ frame = @view mat[:, :, i]
26+ imfilter!(r_noncached, frame_filtered, frame, kernel)
27+ end
28+ return
29+ end
30+ end
2031
2132function test(mats)
2233 kernel = ImageFiltering. factorkernel(Kernel. LoG(1 ))
@@ -26,54 +37,31 @@ function test(mats)
2637 f2 = deepcopy(mat[:, :, 1 ])
2738 r_noncached = CPU1(Algorithm. FFT())
2839 for i in axes(mat, 3 )
29- frame = @view mat[:, :, i]
30- @info " imfilter! noncached"
31- imfilter!(r_noncached, f2, frame, kernel)
32- @info " imfilter! cached"
33- imfilter!(r_cached, f1, frame, kernel)
40+ imfilter!(r_noncached, f2, deepcopy(mat[:, :, i]), kernel)
41+ imfilter!(r_cached, f1, deepcopy(mat[:, :, i]), kernel)
3442 @show f1[1 : 4 ] f2[1 : 4 ]
3543 f1 ≈ f2 || error(" f1 !≈ f2" )
3644 end
3745 return
3846 end
3947end
4048
41- function profile ()
49+ function run ()
4250 Random. seed!(1 )
4351 nmats = 10
44- mats = [rand(Float32, rand(80 : 100 ), rand(80 : 100 ), rand(2000 : 3000 )) for _ in 1 : nmats]
45- GC. gc(true )
52+ mats = [rand(Float64, rand(80 : 100 ), rand(80 : 100 ), rand(2000 : 3000 )) for _ in 1 : nmats]
4653
47- # benchmark(mats)
54+ benchmark_new(mats)
55+ for _ in 1 : 3
56+ @time " warm run of benchmark_new(mats)" benchmark_new(mats)
57+ end
4858
49- # for _ in 1:3
50- # @time "warm run of benchmark(mats)" benchmark(mats)
51- # end
59+ benchmark_old(mats)
60+ for _ in 1 : 3
61+ @time " warm run of benchmark_old(mats)" benchmark_old(mats)
62+ end
5263
5364 test(mats)
54-
55- # Profile.clear()
56- # @profile benchmark(mats)
57-
58- # Profile.print(IOContext(stdout, :displaysize => (24, 200)); C=true, combine=true, mincount=100)
59- # # ProfileView.view()
60- # GC.gc(true)
6165end
6266
63- profile()
64-
65- using ImageFiltering
66- using ImageFiltering. RFFT
67-
68- function mwe()
69- a = rand(Float64, 10 , 10 )
70- out1 = rfft(a)
71-
72- buf = RFFT. RCpair{Float64}(undef, size(a))
73- rfft_plan = RFFT. plan_rfft!(buf)
74- copy!(buf, a)
75- out2 = complex(rfft_plan(buf))
76-
77- return out1 ≈ out2
78- end
79- mwe()
67+ run()
0 commit comments