Skip to content

Commit 24fa6a1

Browse files
committed
Add multiple timers
1 parent 6899fb8 commit 24fa6a1

File tree

4 files changed

+47
-14
lines changed

4 files changed

+47
-14
lines changed

fft_model.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ mutable struct FFTNLPModel{T,VT,FFT,R,C} <: AbstractNLPModel{T,VT}
2424
buffer_complex1::C
2525
buffer_complex2::C
2626
rdft::Bool
27+
fft_timer::Ref{Float64}
28+
mapping_timer::Ref{Float64}
2729
end
2830

2931
function FFTNLPModel{T,VT}(parameters::FFTParameters; rdft::Bool=false) where {T,VT}
@@ -80,7 +82,9 @@ function FFTNLPModel{T,VT}(parameters::FFTParameters; rdft::Bool=false) where {T
8082
buffer_complex1 = Complex{T}.(A)
8183
buffer_complex2 = copy(buffer_complex1)
8284
end
83-
return FFTNLPModel(meta, parameters, N, Counters(), op, buffer_real, buffer_complex1, buffer_complex2, rdft)
85+
fft_timer = Ref{Float64}(0.0)
86+
mapping_timer = Ref{Float64}(0.0)
87+
return FFTNLPModel(meta, parameters, N, Counters(), op, buffer_real, buffer_complex1, buffer_complex2, rdft, fft_timer, mapping_timer)
8488
end
8589

8690
include("kkt.jl")
@@ -186,7 +190,7 @@ function NLPModels.obj(nlp::FFTNLPModel, x::AbstractVector)
186190
index_missing = nlp.parameters.paramf[5]
187191
# Mt = nlp.parameters.paramf[6]
188192

189-
fft_val = M_perp_beta(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, x, index_missing; nlp.rdft)
193+
fft_val = M_perp_beta(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, x, index_missing, nlp.fft_timer, nlp.mapping_timer; nlp.rdft)
190194
N = nlp.N
191195
beta = view(x, 1:N)
192196
c = view(x, N+1:2*N)
@@ -207,7 +211,7 @@ function NLPModels.grad!(nlp::FFTNLPModel, x::AbstractVector, g::AbstractVector)
207211
g_b = view(g, 1:n)
208212
g_c = view(g, n+1:2*n)
209213
beta = view(x, 1:n)
210-
res = M_perpt_M_perp_vec(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, beta, index_missing; nlp.rdft)
214+
res = M_perpt_M_perp_vec(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, beta, index_missing, nlp.fft_timer, nlp.mapping_timer; nlp.rdft)
211215
g_b .= res .- M_perptz
212216
fill!(g_c, lambda)
213217
return g
@@ -232,7 +236,7 @@ function NLPModels.hprod!(
232236
n = prod(DFTsize)
233237
hv_b = view(hv, 1:n)
234238
hv_c = view(hv, n+1:2*n)
235-
hv_b .= M_perpt_M_perp_vec(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, v[1:n], index_missing; nlp.rdft)
239+
hv_b .= M_perpt_M_perp_vec(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, v[1:n], index_missing, nlp.fft_timer, nlp.mapping_timer; nlp.rdft)
236240
fill!(hv_c, 0.0)
237241
return hv
238242
end
@@ -276,7 +280,7 @@ function NLPModels.hess_coord!(
276280
for i in 1:
277281
fill!(v, 0.0)
278282
v[i] = 1.0
279-
H[:, i] .= M_perpt_M_perp_vec(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, v, index_missing; nlp.rdft)
283+
H[:, i] .= M_perpt_M_perp_vec(nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.op, DFTdim, DFTsize, v, index_missing, nlp.fft_timer, nlp.mapping_timer; nlp.rdft)
280284
end
281285

282286
cnt = 1

fft_utils.jl

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,39 +25,55 @@ include("mapping_gpu.jl")
2525
# >size1 = 4;
2626
# >M_perptz = M_perp_tz(z_zero, dim, size1)
2727

28-
function M_perp_tz(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, z_zero; rdft::Bool=false)
28+
function M_perp_tz(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, z_zero, fft_timer, mapping_timer; rdft::Bool=false)
2929
N = prod(_size)
30+
31+
t1 = time_ns()
3032
if rdft
3133
temp = mul!(buffer_complex1, op, z_zero) # op_rfft
3234
else
3335
buffer_complex2 .= z_zero # z_zero should be store in a complex buffer for mul!
3436
temp = mul!(buffer_complex1, op, buffer_complex2) # op_fft
3537
end
3638
temp ./= sqrt(N)
39+
t2 = time_ns()
40+
fft_timer[] = fft_timer[] + (t2 - t1) / 1e9
41+
42+
t3 = time_ns()
3743
beta = vec(buffer_real)
3844
DFT_to_beta!(beta, dim, _size, temp; rdft)
45+
t4 = time_ns()
46+
mapping_timer[] = mapping_timer[] + (t4 - t3) / 1e9
3947
return beta
4048
end
4149

42-
function M_perp_beta(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, beta, idx_missing; rdft::Bool=false)
50+
function M_perp_beta(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, beta, idx_missing, fft_timer, mapping_timer; rdft::Bool=false)
4351
N = prod(_size)
52+
53+
t3 = time_ns()
4454
v = buffer_complex2
4555
beta_to_DFT!(v, dim, _size, beta; rdft)
56+
t4 = time_ns()
57+
mapping_timer[] = mapping_timer[] + (t4 - t3) / 1e9
4658

59+
t1 = time_ns()
4760
if rdft
4861
ldiv!(buffer_real, op, v) # op_rfft
4962
buffer_real .*= sqrt(N)
5063
else
5164
temp = ldiv!(buffer_complex1, op, v) # op_fft
5265
buffer_real .= real.(temp) .* sqrt(N)
5366
end
67+
t2 = time_ns()
68+
fft_timer[] = fft_timer[] + (t2 - t1) / 1e9
69+
5470
buffer_real[idx_missing] .= 0
5571
return buffer_real
5672
end
5773

58-
function M_perpt_M_perp_vec(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, vec, idx_missing; rdft::Bool=false)
59-
temp = M_perp_beta(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, vec, idx_missing; rdft)
60-
temp = M_perp_tz(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, temp; rdft)
74+
function M_perpt_M_perp_vec(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, vec, idx_missing, fft_timer, mapping_timer; rdft::Bool=false)
75+
temp = M_perp_beta(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, vec, idx_missing, fft_timer, mapping_timer; rdft)
76+
temp = M_perp_tz(buffer_real, buffer_complex1, buffer_complex2, op, dim, _size, temp, fft_timer, mapping_timer; rdft)
6177
return temp
6278
end
6379

fft_vishwas.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ N = length(results.solution) ÷ 2
129129
beta_MadNLP = results.solution[1:N]
130130
println("Timer: $(timer)")
131131

132+
# solver.kkt.krylov_iterations
133+
# solver.kkt.krylov_timer
134+
# nlp.fft_timer[]
135+
# nlp.mapping_timer[]
136+
132137
using DelimitedFiles
133138
open("sol_vishwas.txt", "w") do io
134139
writedlm(io, Vector(beta_MadNLP))

kkt.jl

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,18 @@ struct CondensedFFTKKT{T, VT, FFT, R, C} <: AbstractMatrix{T}
2424
buffer_complex1::C # Buffer for fft and ifft
2525
buffer_complex2::C # Buffer for fft and ifft
2626
rdft::Bool
27+
fft_timer::Ref{Float64}
28+
mapping_timer::Ref{Float64}
2729
end
2830

2931
function CondensedFFTKKT{T, VT}(nlp::FFTNLPModel{T, VT}) where {T, VT}
3032
= nlp.N
3133
buf1 = VT(undef, nβ)
3234
Λ1 = VT(undef, nβ)
3335
Λ2 = VT(undef, nβ)
34-
return CondensedFFTKKT{T, VT, typeof(nlp.op), typeof(nlp.buffer_real), typeof(nlp.buffer_complex1)}(nβ, nlp.parameters, buf1, Λ1, Λ2, nlp.op, nlp.buffer_real, nlp.buffer_complex1, nlp.buffer_complex2, nlp.rdft)
36+
return CondensedFFTKKT{T, VT, typeof(nlp.op), typeof(nlp.buffer_real), typeof(nlp.buffer_complex1)}(
37+
nβ, nlp.parameters, buf1, Λ1, Λ2, nlp.op, nlp.buffer_real,
38+
nlp.buffer_complex1, nlp.buffer_complex2, nlp.rdft, nlp.fft_timer, nlp.mapping_timer)
3539
end
3640

3741
Base.size(K::CondensedFFTKKT) = (2*K.nβ, 2*K.nβ)
@@ -54,7 +58,7 @@ function LinearAlgebra.mul!(y::AbstractVector, K::CondensedFFTKKT, x::AbstractVe
5458
xz = view(x, nβ+1:2*nβ)
5559

5660
# Evaluate Mᵀ M xβ
57-
Mβ .= M_perpt_M_perp_vec(K.buffer_real, K.buffer_complex1, K.buffer_complex2, K.op, DFTdim, DFTsize, xβ, index_missing; K.rdft)
61+
Mβ .= M_perpt_M_perp_vec(K.buffer_real, K.buffer_complex1, K.buffer_complex2, K.op, DFTdim, DFTsize, xβ, index_missing, K.fft_timer, K.mapping_timer; K.rdft)
5862

5963
yβ .= beta .*.+ alpha .* (Mβ .+ K.Λ1 .*.+ K.Λ2 .* xz)
6064
yz .= beta .* yz .+ alpha .* (K.Λ2 .*.+ K.Λ1 .* xz)
@@ -124,6 +128,8 @@ struct FFTKKTSystem{T, VI, VT, MT, LS} <: MadNLP.AbstractReducedKKTSystem{T, VT,
124128
z1::VT # dimension nβ
125129
z2::VT # dimension 2 * nβ
126130
linear_solver::LS
131+
krylov_iterations::Vector{Int}
132+
krylov_timer::Vector{Float64}
127133
end
128134

129135
function MadNLP.create_kkt_system(
@@ -167,7 +173,7 @@ function MadNLP.create_kkt_system(
167173
reg, pr_diag, du_diag, l_diag, u_diag, l_lower, u_lower,
168174
ind_cons.ind_lb, ind_cons.ind_ub,
169175
z1, z2,
170-
linear_solver,
176+
linear_solver, Int[], Float64[],
171177
)
172178
end
173179

@@ -259,7 +265,7 @@ function MadNLP.mul!(y::VT, kkt::FFTKKTSystem, x::VT, alpha::Number, beta::Numbe
259265
xy2 = view(_x, 5*+1:6*nβ)
260266

261267
# Evaluate (MᵀM) * xβ
262-
Mβ .= M_perpt_M_perp_vec(kkt.K.buffer_real, kkt.K.buffer_complex1, kkt.K.buffer_complex2, kkt.K.op, DFTdim, DFTsize, xβ, index_missing; kkt.K.rdft)
268+
Mβ .= M_perpt_M_perp_vec(kkt.K.buffer_real, kkt.K.buffer_complex1, kkt.K.buffer_complex2, kkt.K.op, DFTdim, DFTsize, xβ, index_missing, kkt.K.fft_timer, kkt.K.mapping_timer; kkt.K.rdft)
263269
yβ .= beta .*.+ alpha .* (Mβ .- xy1 .+ xy2)
264270
yz .= beta .* yz .- alpha .* (xy1 .+ xy2)
265271
ys1 .= beta .* ys1 .- alpha .* xy1
@@ -358,6 +364,8 @@ function MadNLP.solve!(kkt::FFTKKTSystem, w::MadNLP.AbstractKKTVector)
358364
# Solve with CG
359365
Krylov.solve!(kkt.linear_solver, kkt.K, b, M=kkt.P, atol=1e-12, rtol=0.0, verbose=0)
360366
x = Krylov.solution(kkt.linear_solver)
367+
push!(kkt.krylov_iterations, kkt.linear_solver |> niterations)
368+
push!(kkt.krylov_timer, kkt.linear_solver.stats.timer)
361369

362370
# Unpack solution
363371
w1 .= x[1:nβ] # / x

0 commit comments

Comments
 (0)