Skip to content

Commit 6f1708e

Browse files
author
mohamed82008
committed
Sprinkle @inbounds
1 parent 14e66a3 commit 6f1708e

File tree

1 file changed

+59
-56
lines changed

1 file changed

+59
-56
lines changed

src/lobpcg.jl

Lines changed: 59 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ function (precond!::RPreconditioner)(X)
169169
bs = size(X, 2)
170170
A_ldiv_B!(view(precond!.buffer, :, 1:bs), precond!.M, X)
171171
# Just returning buffer would be cheaper but struct at call site must be mutable
172-
X .= view(precond!.buffer, :, 1:bs)
172+
@inbounds X .= view(precond!.buffer, :, 1:bs)
173173
nothing
174174
end
175175

@@ -205,7 +205,7 @@ RBR!(BlockGram, RBlocks, n) = Ac_mul_B!(view(BlockGram.RAR, 1:n, 1:n), view(RBlo
205205
PBP!(BlockGram, PBlocks, n) = Ac_mul_B!(view(BlockGram.PAP, 1:n, 1:n), view(PBlocks.block, :, 1:n), view(PBlocks.B_block, :, 1:n))
206206

207207
function I!(G, xr)
208-
for j in xr, i in xr
208+
@inbounds for j in xr, i in xr
209209
G[i, j] = ifelse(i==j, 1, 0)
210210
end
211211
return
@@ -215,21 +215,23 @@ function (g::BlockGram)(gram, lambda, n1::Int, n2::Int, n3::Int)
215215
xr = 1:n1
216216
rr = n1+1:n1+n2
217217
pr = n1+n2+1:n1+n2+n3
218-
if n1 > 0
219-
#gram[xr, xr] .= view(g.XAX, 1:n1, 1:n1)
220-
gram[xr, xr] .= Diagonal(view(lambda, 1:n1))
221-
end
222-
if n2 > 0
223-
gram[rr, rr] .= view(g.RAR, 1:n2, 1:n2)
224-
gram[xr, rr] .= view(g.XAR, 1:n1, 1:n2)
225-
conj!(transpose!(view(gram, rr, xr), view(g.XAR, 1:n1, 1:n2)))
226-
end
227-
if n3 > 0
228-
gram[pr, pr] .= view(g.PAP, 1:n3, 1:n3)
229-
gram[rr, pr] .= view(g.RAP, 1:n2, 1:n3)
230-
gram[xr, pr] .= view(g.XAP, 1:n1, 1:n3)
231-
conj!(transpose!(view(gram, pr, rr), view(g.RAP, 1:n2, 1:n3)))
232-
conj!(transpose!(view(gram, pr, xr), view(g.XAP, 1:n1, 1:n3)))
218+
@inbounds begin
219+
if n1 > 0
220+
#gram[xr, xr] .= view(g.XAX, 1:n1, 1:n1)
221+
gram[xr, xr] .= Diagonal(view(lambda, 1:n1))
222+
end
223+
if n2 > 0
224+
gram[rr, rr] .= view(g.RAR, 1:n2, 1:n2)
225+
gram[xr, rr] .= view(g.XAR, 1:n1, 1:n2)
226+
conj!(transpose!(view(gram, rr, xr), view(g.XAR, 1:n1, 1:n2)))
227+
end
228+
if n3 > 0
229+
gram[pr, pr] .= view(g.PAP, 1:n3, 1:n3)
230+
gram[rr, pr] .= view(g.RAP, 1:n2, 1:n3)
231+
gram[xr, pr] .= view(g.XAP, 1:n1, 1:n3)
232+
conj!(transpose!(view(gram, pr, rr), view(g.RAP, 1:n2, 1:n3)))
233+
conj!(transpose!(view(gram, pr, xr), view(g.XAP, 1:n1, 1:n3)))
234+
end
233235
end
234236
return
235237
end
@@ -241,28 +243,28 @@ function (g::BlockGram)(gram, n1::Int, n2::Int, n3::Int, normalized::Bool=true)
241243
if normalized
242244
I!(gram, xr)
243245
else
244-
gram[xr, xr] .= view(g.XAX, 1:n1, 1:n1)
246+
@inbounds gram[xr, xr] .= view(g.XAX, 1:n1, 1:n1)
245247
end
246248
end
247249
if n2 > 0
248250
if normalized
249251
I!(gram, rr)
250252
else
251-
gram[rr, rr] .= view(g.RAR, 1:n2, 1:n2)
253+
@inbounds gram[rr, rr] .= view(g.RAR, 1:n2, 1:n2)
252254
end
253-
gram[xr, rr] .= view(g.XAR, 1:n1, 1:n2)
254-
conj!(transpose!(view(gram, rr, xr), view(g.XAR, 1:n1, 1:n2)))
255+
@inbounds gram[xr, rr] .= view(g.XAR, 1:n1, 1:n2)
256+
@inbounds conj!(transpose!(view(gram, rr, xr), view(g.XAR, 1:n1, 1:n2)))
255257
end
256258
if n3 > 0
257259
if normalized
258260
I!(gram, pr)
259261
else
260-
gram[pr, pr] .= view(g.PAP, 1:n3, 1:n3)
262+
@inbounds gram[pr, pr] .= view(g.PAP, 1:n3, 1:n3)
261263
end
262-
gram[rr, pr] .= view(g.RAP, 1:n2, 1:n3)
263-
gram[xr, pr] .= view(g.XAP, 1:n1, 1:n3)
264-
conj!(transpose!(view(gram, pr, rr), view(g.RAP, 1:n2, 1:n3)))
265-
conj!(transpose!(view(gram, pr, xr), view(g.XAP, 1:n1, 1:n3)))
264+
@inbounds gram[rr, pr] .= view(g.RAP, 1:n2, 1:n3)
265+
@inbounds gram[xr, pr] .= view(g.XAP, 1:n1, 1:n3)
266+
@inbounds conj!(transpose!(view(gram, pr, rr), view(g.RAP, 1:n2, 1:n3)))
267+
@inbounds conj!(transpose!(view(gram, pr, xr), view(g.XAP, 1:n1, 1:n3)))
266268
end
267269
return
268270
end
@@ -274,10 +276,10 @@ end
274276

275277
function A_rdiv_B!(A, B::UpperTriangular)
276278
s = size(A, 2)
277-
A[:,1] .= view(A, :, 1) ./ B[1,1]
278-
for i in 2:s
279+
@inbounds A[:,1] .= view(A, :, 1) ./ B[1,1]
280+
@inbounds for i in 2:s
279281
for j in 1:i-1
280-
A[:,i] .= view(A, :, i) - view(A, :, j) .* B[j,i]
282+
A[:,i] .= view(A, :, i) .- view(A, :, j) .* B[j,i]
281283
end
282284
A[:,i] .= view(A, :, i) ./ B[i,i]
283285
end
@@ -286,9 +288,8 @@ end
286288

287289
realdiag!(M) = nothing
288290
function realdiag!(M::AbstractMatrix{TC}) where TC <: Complex
289-
T = real(eltype(M))
290-
for i in 1:size(M, 1)
291-
M[i,i] = Complex(real(M[i,i]), zero(T))
291+
@inbounds for i in 1:size(M, 1)
292+
M[i,i] = real(M[i,i])
292293
end
293294
return M
294295
end
@@ -434,9 +435,9 @@ end
434435
function residuals!(iterator)
435436
sizeX = size(iterator.XBlocks.block, 2)
436437
A_mul_B!(iterator.RBlocks.block, iterator.XBlocks.B_block, Diagonal(view(iterator.ritz_values, 1:sizeX)))
437-
iterator.RBlocks.block .= iterator.XBlocks.A_block .- iterator.RBlocks.block
438+
@inbounds iterator.RBlocks.block .= iterator.XBlocks.A_block .- iterator.RBlocks.block
438439
# Finds residual norms
439-
for j in 1:size(iterator.RBlocks.block, 2)
440+
@inbounds for j in 1:size(iterator.RBlocks.block, 2)
440441
iterator.residuals[j] = 0
441442
for i in 1:size(iterator.RBlocks.block, 1)
442443
x = iterator.RBlocks.block[i,j]
@@ -450,13 +451,13 @@ end
450451
function update_mask!(iterator, residualTolerance)
451452
sizeX = size(iterator.XBlocks.block, 2)
452453
# Update active vectors mask
453-
iterator.activeMask .= view(iterator.residuals, 1:sizeX) .> residualTolerance
454+
@inbounds iterator.activeMask .= view(iterator.residuals, 1:sizeX) .> residualTolerance
454455
iterator.currentBlockSize[] = sum(iterator.activeMask)
455456
return
456457
end
457458

458459
function update_active!(mask, bs::Int, blockPairs...)
459-
for (activeblock, block) in blockPairs
460+
@inbounds for (activeblock, block) in blockPairs
460461
activeblock[:, 1:bs] .= view(block, :, mask)
461462
end
462463
return
@@ -522,8 +523,8 @@ function sub_problem!(iterator, sizeX, bs1, bs2)
522523
end
523524
# Selects extremal eigenvalues and corresponding vectors
524525
selectperm!(view(iterator.λperm, 1:subdim), eigf.values, 1:subdim, rev=iterator.largest)
525-
iterator.ritz_values[1:sizeX] .= view(eigf.values, view(iterator.λperm, 1:sizeX))
526-
iterator.V[1:subdim, 1:sizeX] .= view(eigf.vectors, :, view(iterator.λperm, 1:sizeX))
526+
@inbounds iterator.ritz_values[1:sizeX] .= view(eigf.values, view(iterator.λperm, 1:sizeX))
527+
@inbounds iterator.V[1:subdim, 1:sizeX] .= view(eigf.vectors, :, view(iterator.λperm, 1:sizeX))
527528

528529
return
529530
end
@@ -554,10 +555,10 @@ function update_X_P!(iterator::LOBPCGIterator{Generalized}, bs1, bs2) where Gene
554555
if Generalized
555556
A_mul_B!(iterator.tempXBlocks.B_block, pb_blockview, p_eigview)
556557
end
557-
iterator.PBlocks.block .= iterator.PBlocks.block .+ iterator.tempXBlocks.block
558-
iterator.PBlocks.A_block .= iterator.PBlocks.A_block .+ iterator.tempXBlocks.A_block
558+
@inbounds iterator.PBlocks.block .= iterator.PBlocks.block .+ iterator.tempXBlocks.block
559+
@inbounds iterator.PBlocks.A_block .= iterator.PBlocks.A_block .+ iterator.tempXBlocks.A_block
559560
if Generalized
560-
iterator.PBlocks.B_block .= iterator.PBlocks.B_block .+ iterator.tempXBlocks.B_block
561+
@inbounds iterator.PBlocks.B_block .= iterator.PBlocks.B_block .+ iterator.tempXBlocks.B_block
561562
end
562563
end
563564
block = iterator.XBlocks.block
@@ -571,19 +572,21 @@ function update_X_P!(iterator::LOBPCGIterator{Generalized}, bs1, bs2) where Gene
571572
tempblock = iterator.tempXBlocks.B_block
572573
A_mul_B!(tempblock, block, x_eigview)
573574
end
574-
if bs1 > 0
575-
iterator.XBlocks.block .= iterator.tempXBlocks.block .+ iterator.PBlocks.block
576-
iterator.XBlocks.A_block .= iterator.tempXBlocks.A_block .+ iterator.PBlocks.A_block
577-
if Generalized
578-
iterator.XBlocks.B_block .= iterator.tempXBlocks.B_block .+ iterator.PBlocks.B_block
579-
end
580-
else
581-
iterator.XBlocks.block .= iterator.tempXBlocks.block
582-
iterator.XBlocks.A_block .= iterator.tempXBlocks.A_block
583-
if Generalized
584-
iterator.XBlocks.B_block .= iterator.tempXBlocks.B_block
575+
@inbounds begin
576+
if bs1 > 0
577+
iterator.XBlocks.block .= iterator.tempXBlocks.block .+ iterator.PBlocks.block
578+
iterator.XBlocks.A_block .= iterator.tempXBlocks.A_block .+ iterator.PBlocks.A_block
579+
if Generalized
580+
iterator.XBlocks.B_block .= iterator.tempXBlocks.B_block .+ iterator.PBlocks.B_block
581+
end
582+
else
583+
iterator.XBlocks.block .= iterator.tempXBlocks.block
584+
iterator.XBlocks.A_block .= iterator.tempXBlocks.A_block
585+
if Generalized
586+
iterator.XBlocks.B_block .= iterator.tempXBlocks.B_block
587+
end
585588
end
586-
end
589+
end
587590
return
588591
end
589592

@@ -732,8 +735,8 @@ function lobpcg!(iterator::LOBPCGIterator; log=false, tol=nothing, maxiter=200,
732735
T = eltype(iterator.XBlocks.block)
733736
X = iterator.XBlocks.block
734737
if !not_zeros
735-
for j in 1:size(X,2)
736-
if all(x -> x==0, view(X, :, j))
738+
for j in 1:size(X,2)
739+
if all(x -> x==0, view(X, :, j))
737740
@inbounds X[:,j] .= rand.()
738741
end
739742
end
@@ -750,7 +753,7 @@ function lobpcg!(iterator::LOBPCGIterator; log=false, tol=nothing, maxiter=200,
750753
iterator.currentBlockSize[] == 0 && break
751754
iterator.iteration[] += 1
752755
end
753-
iterator.λ .= view(iterator.ritz_values, 1:sizeX)
756+
@inbounds iterator.λ .= view(iterator.ritz_values, 1:sizeX)
754757

755758
results = LOBPCGResults(iterator.λ, X, iterator.iteration[], iterator.residuals[1:sizeX], residualTolerance, all((x)->(norm(x)<=residualTolerance), view(iterator.residuals, 1:sizeX)), maxiter, iterator.trace)
756759

0 commit comments

Comments
 (0)