Skip to content

Commit cabd85e

Browse files
authored
Add kdiv! -- kscalcopy! -- kdivcopy! (#980)
* Add kdiv! -- kscalcopy! -- kdivcopy! * Fix errors in tricg.jl and trimr.jl
1 parent f4d9549 commit cabd85e

31 files changed

+332
-276
lines changed

docs/src/custom_workspaces.md

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,17 @@ function Krylov.kscal!(n::Integer, s::T, x::HaloVector{T}) where T <: FloatOrCom
188188
return x
189189
end
190190
191+
function Krylov.kdiv!(n::Integer, x::HaloVector{T}, s::T) where T <: FloatOrComplex
192+
mx, nx = size(x.data)
193+
_x = x.data
194+
for i = 1:mx-1
195+
for j = 1:nx-1
196+
_x[i,j] = _x[i,j] / s
197+
end
198+
end
199+
return x
200+
end
201+
191202
function Krylov.kaxpy!(n::Integer, s::T, x::HaloVector{T}, y::HaloVector{T}) where T <: FloatOrComplex
192203
mx, nx = size(x.data)
193204
_x = x.data
@@ -224,6 +235,30 @@ function Krylov.kcopy!(n::Integer, y::HaloVector{T}, x::HaloVector{T}) where T <
224235
return y
225236
end
226237
238+
function Krylov.kscalcopy!(n::Integer, y::HaloVector{T}, s::T, x::HaloVector{T}) where T <: FloatOrComplex
239+
mx, nx = size(x.data)
240+
_x = x.data
241+
_y = y.data
242+
for i = 1:mx-1
243+
for j = 1:nx-1
244+
_y[i,j] = s * _x[i,j]
245+
end
246+
end
247+
return y
248+
end
249+
250+
function Krylov.kdivcopy!(n::Integer, y::HaloVector{T}, x::HaloVector{T}, s::T) where T <: FloatOrComplex
251+
mx, nx = size(x.data)
252+
_x = x.data
253+
_y = y.data
254+
for i = 1:mx-1
255+
for j = 1:nx-1
256+
_y[i,j] = _x[i,j] / s
257+
end
258+
end
259+
return y
260+
end
261+
227262
function Krylov.kfill!(x::HaloVector{T}, val::T) where T <: FloatOrComplex
228263
mx, nx = size(x.data)
229264
_x = x.data
@@ -251,7 +286,13 @@ function Krylov.kref!(n::Integer, x::HaloVector{T}, y::HaloVector{T}, c::T, s::T
251286
end
252287
```
253288

254-
Note that `Krylov.kref!` is only required for `minres_qlp`.
289+
By default, `kdiv!(n, y, x, s)` calls `kscal!(n, y, t, x)` with `t = 1/s`, so a separate implementation isn't required.
290+
However, this approach may introduce numerical issues when `s` is very small.
291+
We do this because computing $y \leftarrow t \times x$ can often leverage SIMD or fused multiply-add (FMA) instructions on certain architectures, capabilities that a direct element-wise division $y \leftarrow x/s$ typically lacks.
292+
Thus, the implementation of `kdiv!` provides flexibility, allowing users to choose a trade-off between speed and numerical precision by overloading the function if needed.
293+
The operations provided by `kdivcopy!` and `kscalcopy!` could be implemented directly by using `kcopy!`, `kscal!`, and `kdiv!` but require two separate memory passes, which can be suboptimal for performance.
294+
To address this limitation, `kdivcopy!` and `kscalcopy!` fuse the copy and scaling/division operations into a single memory pass.
295+
Note that `Krylov.kref!` is only required for the function `minres_qlp`.
255296

256297
### 2D Poisson equation solver with Krylov methods
257298

src/bilq.jl

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -191,19 +191,19 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time
191191
(verbose > 0) && @printf(iostream, "%5s %8s %7s %5s\n", "k", "αₖ", "‖rₖ‖", "timer")
192192
kdisplay(iter, verbose) && @printf(iostream, "%5d %8.1e %7.1e %.2fs\n", iter, cᴴb, bNorm, start_time |> ktimer)
193193

194-
βₖ = (abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
195-
γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
196-
kfill!(vₖ₋₁, zero(FC)) # v₀ = 0
197-
kfill!(uₖ₋₁, zero(FC)) # u₀ = 0
198-
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
199-
uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
200-
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
201-
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
202-
kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
203-
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
204-
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
205-
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
206-
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
194+
βₖ = (abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
195+
γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
196+
kfill!(vₖ₋₁, zero(FC)) # v₀ = 0
197+
kfill!(uₖ₋₁, zero(FC)) # u₀ = 0
198+
kdivcopy!(n, vₖ, r₀, βₖ) # v₁ = (b - Ax₀) / β₁
199+
kdivcopy!(n, uₖ, c, conj(γₖ)) # u₁ = c / γ̄₁
200+
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
201+
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
202+
kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
203+
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
204+
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
205+
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
206+
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
207207

208208
# Stopping criterion.
209209
solved_lq = bNorm ε
@@ -321,8 +321,8 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time
321321
kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ
322322

323323
if pᴴq 0
324-
vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
325-
uₖ .= p ./ conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
324+
kdivcopy!(n, vₖ, q, βₖ₊₁) # vₖ₊₁ = q / βₖ₊₁
325+
kdivcopy!(n, uₖ, p, conj(γₖ₊₁)) # uₖ₊₁ = p / γ̄ₖ₊₁
326326
end
327327

328328
# Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖

src/bilqr.jl

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -175,24 +175,24 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi
175175
end
176176

177177
# Set up workspace.
178-
βₖ = (abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
179-
γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
180-
kfill!(vₖ₋₁, zero(FC)) # v₀ = 0
181-
kfill!(uₖ₋₁, zero(FC)) # u₀ = 0
182-
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
183-
uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁
184-
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
185-
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
186-
kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
187-
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
188-
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
189-
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
190-
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
191-
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
192-
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
193-
kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
194-
kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
195-
τₖ = zero(T) # τₖ is used for the dual residual norm estimate
178+
βₖ = (abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
179+
γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
180+
kfill!(vₖ₋₁, zero(FC)) # v₀ = 0
181+
kfill!(uₖ₋₁, zero(FC)) # u₀ = 0
182+
kdivcopy!(n, vₖ, r₀, βₖ) # v₁ = (b - Ax₀) / β₁
183+
kdivcopy!(n, uₖ, s₀, conj(γₖ)) # u₁ = (c - Aᴴy₀) / γ̄₁
184+
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
185+
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
186+
kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
187+
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
188+
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
189+
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
190+
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
191+
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
192+
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
193+
kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
194+
kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
195+
τₖ = zero(T) # τₖ is used for the dual residual norm estimate
196196

197197
# Stopping criterion.
198198
solved_lq = bNorm == 0
@@ -355,23 +355,22 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi
355355
# w₁ = u₁ / δ̄₁
356356
if iter == 2
357357
wₖ₋₁ = wₖ₋₂
358-
kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
359-
wₖ₋₁ .= uₖ₋₁ ./ conj(δₖ₋₁)
358+
kdivcopy!(n, wₖ₋₁, uₖ₋₁, conj(δₖ₋₁))
360359
end
361360
# w₂ = (u₂ - λ̄₁w₁) / δ̄₂
362361
if iter == 3
363362
wₖ₋₁ = wₖ₋₃
364363
kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
365364
kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
366-
wₖ₋₁ .= wₖ₋₁ ./ conj(δₖ₋₁)
365+
kdiv!(n, wₖ₋₁, conj(δₖ₋₁))
367366
end
368367
# wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁
369368
if iter 4
370369
kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃)
371370
wₖ₋₁ = wₖ₋₃
372371
kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁)
373372
kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁)
374-
wₖ₋₁ .= wₖ₋₁ ./ conj(δₖ₋₁)
373+
kdiv!(n, wₖ₋₁, conj(δₖ₋₁))
375374
end
376375

377376
if iter 3
@@ -405,9 +404,10 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi
405404
kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ
406405
kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ
407406

407+
408408
if pᴴq zero(FC)
409-
vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
410-
uₖ .= p ./ conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
409+
kdivcopy!(n, vₖ, q, βₖ₊₁) # vₖ₊₁ = q / βₖ₊₁
410+
kdivcopy!(n, uₖ, p, conj(γₖ₊₁)) # uₖ₊₁ = p / γ̄ₖ₊₁
411411
end
412412

413413
# Update ϵₖ₋₃, λₖ₋₂, δbarₖ₋₁, cₖ₋₁, sₖ₋₁, γₖ and βₖ.

src/block_krylov_utils.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ function gs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, v::AbstractVector{FC}
2626
kfill!(R, zero(FC))
2727
for j = 1:k
2828
qⱼ = view(Q,:,j)
29-
aⱼ .= qⱼ
29+
kcopy!(n, aⱼ, qⱼ)
3030
for i = 1:j-1
3131
qᵢ = view(Q,:,i)
3232
R[i,j] = kdot(n, qᵢ, aⱼ) # rᵢⱼ = ⟨qᵢ , aⱼ⟩

src/cg_lanczos.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax
157157

158158
# Initialize Lanczos process.
159159
# β₁Mv₁ = b
160-
kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
161-
MisI || kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
162-
kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv
160+
kdiv!(n, v, β) # v₁ ← v₁ / β₁
161+
MisI || kdiv!(n, Mv, β) # Mv₁ ← Mv₁ / β₁
162+
kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv
163163

164164
iter = 0
165165
itmax == 0 && (itmax = 2 * n)
@@ -191,7 +191,7 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax
191191

192192
# Check curvature. Exit fast if requested.
193193
# It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ.
194-
γ = one(T) / - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
194+
γ = inv- ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
195195
indefinite |= 0)
196196
(check_curvature & indefinite) && continue
197197

@@ -203,8 +203,8 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax
203203
kcopy!(n, Mv, Mv_next) # Mvₖ ← Mvₖ₊₁
204204
MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
205205
β = knorm_elliptic(n, v, Mv) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
206-
kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
207-
MisI || kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
206+
kdiv!(n, v, β) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
207+
MisI || kdiv!(n, Mv, β) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
208208
Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
209209
β_prev = β
210210

src/cg_lanczos_shift.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t
160160

161161
# Initialize Lanczos process.
162162
# β₁Mv₁ = b
163-
kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
164-
MisI || kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
165-
kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv
163+
kdiv!(n, v, β) # v₁ ← v₁ / β₁
164+
MisI || kdiv!(n, Mv, β) # Mv₁ ← Mv₁ / β₁
165+
kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv
166166

167167
# Initialize some constants used in recursions below.
168168
ρ = one(T)
@@ -206,15 +206,15 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t
206206
kcopy!(n, Mv, Mv_next) # Mvₖ ← Mvₖ₊₁
207207
MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
208208
β = knorm_elliptic(n, v, Mv) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
209-
kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
210-
MisI || kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
209+
kdiv!(n, v, β) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
210+
MisI || kdiv!(n, Mv, β) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
211211

212212
# Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
213213
# It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ.
214214
MisI ||= kdotr(n, v, v))
215215
for i = 1 : nshifts
216216
δhat[i] = δ + ρ * shifts[i]
217-
γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
217+
γ[i] = inv(δhat[i] - ω[i] / γ[i])
218218
end
219219
for i = 1 : nshifts
220220
indefinite[i] |= γ[i] 0

src/cgls_lanczos_shift.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,8 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose
161161

162162
# Initialize Lanczos process.
163163
# β₁v₁ = b
164-
kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
165-
kscal!(m, one(FC) / β, u)
164+
kdiv!(n, v, β) # v₁ ← v₁ / β₁
165+
kdiv!(m, u, β)
166166

167167
# Initialize some constants used in recursions below.
168168
ρ = one(T)
@@ -196,21 +196,21 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose
196196
while ! (solved || tired || user_requested_exit || overtimed)
197197

198198
# Form next Lanczos vector.
199-
mul!(u_next, A, v) # u_nextₖ ← Avₖ
200-
δ = kdotr(m, u_next, u_next) # δₖ = vₖᵀAᴴAvₖ
201-
kaxpy!(m, -δ, u, u_next) # uₖ₊₁ = u_nextₖ - δₖuₖ - βₖuₖ₋₁
199+
mul!(u_next, A, v) # u_nextₖ ← Avₖ
200+
δ = kdotr(m, u_next, u_next) # δₖ = vₖᵀAᴴAvₖ
201+
kaxpy!(m, -δ, u, u_next) # uₖ₊₁ = u_nextₖ - δₖuₖ - βₖuₖ₋₁
202202
kaxpy!(m, -β, u_prev, u_next)
203-
mul!(v, Aᴴ, u_next) # vₖ₊₁ = Aᴴuₖ₊₁
204-
β = knorm_elliptic(n, v, v) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
205-
kscal!(n, one(FC) / β, v) # vₖ₊₁ vₖ₊₁ / βₖ₊₁
206-
kscal!(m, one(FC) / β, u_next) # uₖ₊₁ = uₖ₊₁ / βₖ₊₁
207-
kcopy!(m, u_prev, u) # u_prev ← u
208-
kcopy!(m, u, u_next) # u ← u_next
203+
mul!(v, Aᴴ, u_next) # vₖ₊₁ = Aᴴuₖ₊₁
204+
β = knorm_elliptic(n, v, v) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
205+
kdiv!(n, v, β) # vₖ₊₁ = vₖ₊₁ / βₖ₊₁
206+
kdiv!(m, u_next, β) # uₖ₊₁ = uₖ₊₁ / βₖ₊₁
207+
kcopy!(m, u_prev, u) # u_prev ← u
208+
kcopy!(m, u, u_next) # u ← u_next
209209

210210
MisI ||= kdotr(n, v, v))
211211
for i = 1 : nshifts
212212
δhat[i] = δ + ρ * shifts[i]
213-
γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
213+
γ[i] = inv(δhat[i] - ω[i] / γ[i])
214214
end
215215

216216
# Compute next CG iterate for each shifted system that has not yet converged.

src/craig.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,8 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
223223

224224
# Initialize Golub-Kahan process.
225225
# β₁Mu₁ = b.
226-
kscal!(m, one(FC) / β₁, u)
227-
MisI || kscal!(m, one(FC) / β₁, Mu)
226+
kdiv!(m, u, β₁)
227+
MisI || kdiv!(m, Mu, β₁)
228228

229229
kfill!(Nv, zero(FC))
230230
kfill!(w, zero(FC)) # Used to update y.
@@ -275,8 +275,8 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
275275
inconsistent = true
276276
continue
277277
end
278-
kscal!(n, one(FC) / α, v)
279-
NisI || kscal!(n, one(FC) / α, Nv)
278+
kdiv!(n, v, α)
279+
NisI || kdiv!(n, Nv, α)
280280

281281
Anorm² += α * α + λ * λ
282282

@@ -315,8 +315,8 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
315315
MisI || mulorldiv!(u, M, Mu, ldiv)
316316
β = knorm_elliptic(m, u, Mu)
317317
if β 0
318-
kscal!(m, one(FC) / β, u)
319-
MisI || kscal!(m, one(FC) / β, Mu)
318+
kdiv!(m, u, β)
319+
MisI || kdiv!(m, Mu, β)
320320
end
321321

322322
# Finish updates from the first Givens rotation.
@@ -358,8 +358,8 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
358358
solved_resid_lim = rNorm btol + atol * Anorm * xNorm / β₁
359359
solved = solved_mach | solved_lim | solved_resid_tol | solved_resid_lim
360360

361-
ill_cond_mach = one(T) + one(T) / Acond one(T)
362-
ill_cond_lim = 1 / Acond ctol
361+
ill_cond_mach = one(T) + inv(Acond) one(T)
362+
ill_cond_lim = inv(Acond) ctol
363363
ill_cond = ill_cond_mach | ill_cond_lim
364364

365365
user_requested_exit = callback(solver) :: Bool

0 commit comments

Comments
 (0)