Skip to content

Commit 3ab6e5e

Browse files
MaxenceGollierdpo
andauthored
Add Golub Riley iteration method (JuliaSmoothOptimizers#123)
Co-authored-by: Dominique <dominique.orban@gmail.com>
1 parent 7e3cdb1 commit 3ab6e5e

File tree

4 files changed

+201
-4
lines changed

4 files changed

+201
-4
lines changed

src/QRMumps.jl

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ include("wrapper/qr_mumps_api.jl")
3636

3737
include("utils.jl")
3838

39-
export qrm_spmat, qrm_spfct,
39+
export qrm_spmat, qrm_shifted_spmat, qrm_spfct,
4040
qrm_init, qrm_finalize,
4141
qrm_spmat_init!, qrm_spmat_init, qrm_spmat_destroy!,
4242
qrm_spfct_init!, qrm_spfct_init, qrm_spfct_destroy!,
@@ -55,7 +55,10 @@ export qrm_spmat, qrm_spfct,
5555
qrm_min_norm_semi_normal!, qrm_min_norm_semi_normal,
5656
qrm_residual_norm!, qrm_residual_norm,
5757
qrm_residual_orth!, qrm_residual_orth,
58-
qrm_refine!, qrm_refine, qrm_set, qrm_get,
58+
qrm_refine!, qrm_refine,
59+
qrm_shift_spmat, qrm_update_shift_spmat!,
60+
qrm_golub_riley, qrm_golub_riley!,
61+
qrm_set, qrm_get,
5962
qrm_user_permutation!
6063

6164
@doc raw"""
@@ -522,6 +525,67 @@ function qrm_refine! end
522525
"""
523526
function qrm_refine end
524527

528+
@doc raw"""
529+
qrm_shifted_spmat = qrm_shift_spmat(spmat, α = 0)
530+
531+
Given a `spmat` structure representing some sparse matrix `A`, return the block matrix `(A √α)` as a `qrm_shifted_spmat` type. See `qrm_shifted_spmat` for more information.
532+
This can be especially useful when `A` is rank deficient, as choosing `α > 0` acts as a regularization.
533+
534+
### Input Arguments
535+
* `spmat`: the input matrix
536+
* `α ≥ 0`: the regularization parameter (note the square root in the block matrix representation).
537+
"""
538+
function qrm_shift_spmat end
539+
540+
@doc raw"""
541+
qrm_update_shift_spmat!(shifted_spmat, α)
542+
543+
Given a `shifted` block matrix of the form `(A √α)`, update the parameter `α` in the matrix.
544+
### Input Arguments
545+
* `shifted_spmat`: the input matrix of the qrm_shifted_spmat type. See `qrm_shifted_spmat` for more information.
546+
* `α ≥ 0`: the regularization parameter (note the square root in the block matrix representation).
547+
"""
548+
function qrm_update_shift_spmat! end
549+
550+
@doc raw"""
551+
x = qrm_golub_riley(spmat, b)
552+
553+
### Input Arguments :
554+
* `spmat`: the input matrix of the ill-conditionned system `Ax = b`.
555+
* `b`: the RHS of the ill-conditionned system.
556+
"""
557+
function qrm_golub_riley end
558+
559+
@doc raw"""
560+
qrm_golub_riley!(shifted_spmat, spfct, x, b, Δx, y; α = ϵm, max_iter = 50, tol = ϵm, transp = 'n')
561+
562+
This method implements the Golub-Riley iteration.
563+
Given a (possibly ill-conditionned or rank deficient) system `Ax = b` where `A` can have any shape `m×n`, compute `x = A†b = Aᵀ(AAᵀ)†b` where `A†` is the Moore-Penrose pseudoinverse of `A`.
564+
565+
### Input Arguments :
566+
567+
* `shifted_spmat`: a `qrm_shifted_spmat` type representing the matrix `(A √α)` where `α` is a regularization parameter for the Golub-Riley iteration. See `qrm_shift_spmat` for more information.
568+
* `spfct`: a sparse factorization object of type `qrm_spfct`.
569+
* `x`: the approximate solution vector x := A†b = Aᵀy, the size of this vector is n+m, its values are overwritten by this function and it can be uninitialized when the function is called.
570+
* `b`: the RHS vector of the linear system, the size of this vector is m.
571+
* `Δx`: an auxiliary vector used to compute the solution, the size of this vector is n+m and can be uninitialized when the function is called.
572+
* `y`: the vector used to compute y := (AAᵀ)†b, the size of this vector is n and can be uninitialized when the function is called.
573+
* `Δy`: an auxiliary vector used to compute the solution, the size of this vector is n and can be uninitialized when the function is called.
574+
575+
The definition of the `shifted_spmat` may look odd at first sight. We use such a block matrix as an argument of `qrm_golub_riley!` because the QR factorization of this matrix has the property that
576+
`RᵀR = AᵀA + αI` which is the system we need to repeatedly solve from for the Golub-Riley iteration.
577+
578+
The Golub-Riley method works as follows:
579+
580+
* Given A and b, choose a fixed regularization parameter `α > 0` and let x := 0.
581+
* At each iteration, compute Δx := Aᵀ(AAᵀ + αI)⁻¹ (b - Ax), and update x := x + Δx.
582+
583+
This method has the advantage of only costing one QR-factorization of the `qrm_shifted_spmat` matrix.
584+
For more details, see
585+
A. Dax and L. Eldén, Approximating minimum norm solutions of rank-deficient least squares problems, Numerical Linear Algebra with Applications, 5, pp. 79-99, 1998, DOI 10.1002/(SICI)1099-1506(199803/04)5:2<79::AID-NLA126>3.0.CO;2-4
586+
"""
587+
function qrm_golub_riley! end
588+
525589
@doc raw"""
526590
qrm_set(str, val)
527591
qrm_set(spfct, str, val)

src/utils.jl

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,107 @@ function qrm_least_squares_semi_normal!(spmat :: qrm_spmat{T}, spfct :: qrm_spfc
191191

192192
qrm_refine!(spmat, spfct, x, z, Δx, y)
193193
end
194+
195+
function qrm_shift_spmat(spmat :: qrm_spmat{T}, α :: T = T(0)) where T # Given a m×n matrix A, return the matrix Aₐ = [A √aI].
196+
@assert real(α) 0
197+
@assert imag(α) == 0
198+
199+
m = spmat.mat.m
200+
n = spmat.mat.n
201+
irn = copy(spmat.irn)
202+
jcn = copy(spmat.jcn)
203+
val = copy(spmat.val)
204+
append!(irn, eltype(irn)(1):eltype(irn)(m))
205+
append!(jcn, eltype(irn)(1 + n):eltype(irn)(m + n))
206+
append!(val, sqrt(α)*ones(T,m))
207+
shifted_spmat = qrm_spmat_init(m, n + m, irn, jcn, val)
208+
209+
return qrm_shifted_spmat(shifted_spmat, α)
210+
end
211+
212+
function qrm_update_shift_spmat!(shifted_spmat :: qrm_shifted_spmat{T}, α :: T) where T
213+
shifted_spmat.α = α
214+
shifted_spmat.spmat.val[shifted_spmat.spmat.mat.nz - shifted_spmat.spmat.mat.m + 1:end] .= sqrt(α)
215+
end
216+
217+
function qrm_golub_riley(spmat :: qrm_spmat{T}, b :: AbstractVector{T}; α :: T = T(eps(real(T))), max_iter :: Int = 50, tol :: Real = eps(real(T)), transp :: Char = 'n') where T
218+
shifted_spmat = qrm_shift_spmat(spmat, α)
219+
spfct = qrm_spfct_init(shifted_spmat.spmat)
220+
n = shifted_spmat.spmat.mat.n
221+
m = shifted_spmat.spmat.mat.m
222+
223+
x = similar(b, n)
224+
Δx = similar(b, n)
225+
y = similar(b)
226+
Δy = similar(b)
227+
228+
qrm_golub_riley!(
229+
shifted_spmat,
230+
spfct,
231+
x,
232+
b,
233+
Δx,
234+
y,
235+
Δy,
236+
α = α,
237+
max_iter = max_iter,
238+
tol = tol,
239+
transp = transp
240+
)
241+
return x[1:n-m]
242+
end
243+
244+
function qrm_golub_riley!(
245+
shifted_spmat :: qrm_shifted_spmat{T},
246+
spfct :: qrm_spfct{T},
247+
x :: AbstractVector{T},
248+
b :: AbstractVector{T},
249+
Δx ::AbstractVector{T},
250+
y :: AbstractVector{T},
251+
Δy :: AbstractVector{T};
252+
α :: T = T(eps(real(T))),
253+
max_iter :: Int = 50,
254+
tol :: Real = eps(real(T)),
255+
transp :: Char = 'n'
256+
) where T
257+
258+
spmat = shifted_spmat.spmat
259+
t = T <: Real ? 't' : 'c'
260+
ntransp = transp == 't' || transp == 'c' ? 'n' : t
261+
262+
@assert real(α) 0
263+
@assert imag(α) == 0
264+
@assert length(b) == spmat.mat.m
265+
@assert length(x) == spmat.mat.n
266+
@assert length(y) == spmat.mat.m
267+
@assert length(Δy) == spmat.mat.m
268+
@assert length(Δx) == spmat.mat.n
269+
270+
qrm_update_shift_spmat!(shifted_spmat, α)
271+
qrm_spfct_init!(spfct, spmat)
272+
273+
qrm_set(spfct, "qrm_keeph", 0)
274+
qrm_analyse!(spmat, spfct, transp = transp)
275+
qrm_factorize!(spmat, spfct, transp = transp)
276+
277+
k = 0
278+
solved = false
279+
x .= T(0)
280+
y .= T(0)
281+
while k < max_iter && !solved
282+
283+
qrm_spmat_mv!(spmat, T(1), x, T(0), Δy, transp = ntransp)
284+
@. Δy = b - Δy
285+
286+
qrm_solve!(spfct, Δy, Δx, transp = t)
287+
qrm_solve!(spfct, Δx, Δy, transp = 'n')
288+
289+
qrm_spmat_mv!(spmat, T(1), Δy, T(0), Δx, transp = transp)
290+
291+
@. x = x + Δx
292+
@. y = y + Δy
293+
294+
solved = norm(Δx) tol*norm(x)
295+
k = k + 1
296+
end
297+
end

src/wrapper/qr_mumps_common.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ mutable struct qrm_spmat{T} <: AbstractSparseMatrix{T, Cint}
3535
end
3636
end
3737

38+
@doc raw"""
39+
This data type represents a "shifted" matrix. When one wants to solve a "regularized" problem of the form `(AᵀA + αI)x = b`,
40+
one can use a QR factorization of the block matrix `QR = (A √α)ᵀ` and note that `RᵀR = AᵀA + αI`. This type of problem is especially useful when `A` is poorly conditioned or rank deficient.
41+
It only contains a `qrm_spmat` matrix representing the above block matrix and the regularization parameter `α`.
42+
"""
43+
mutable struct qrm_shifted_spmat{T} <: AbstractSparseMatrix{T, Cint}
44+
spmat :: qrm_spmat{T}
45+
α :: T
46+
end
47+
3848
function Base.cconvert(::Type{Ref{c_spmat{T}}}, spmat :: qrm_spmat{T}) where T
3949
return spmat.mat
4050
end
@@ -45,12 +55,18 @@ function Base.unsafe_convert(::Type{Ref{c_spmat{T}}}, spmat :: c_spmat{T}) where
4555
end
4656

4757
Base.size(spmat :: qrm_spmat) = (spmat.mat.m, spmat.mat.n)
58+
Base.size(shifted_spmat :: qrm_shifted_spmat) = (shifted_spmat.spmat.mat.m, shifted_spmat.spmat.mat.n)
4859
SparseArrays.nnz(spmat :: qrm_spmat) = spmat.mat.nz
60+
SparseArrays.nnz(shifted_spmat :: qrm_shifted_spmat) = shifted_spmat.spmat.mat.nz
4961

5062
function Base.show(io :: IO, ::MIME"text/plain", spmat :: qrm_spmat)
5163
println(io, "Sparse matrix -- qrm_spmat of size ", size(spmat), " with ", nnz(spmat), " nonzeros.")
5264
end
5365

66+
function Base.show(io :: IO, ::MIME"text/plain", shifted_spmat :: qrm_shifted_spmat)
67+
println(io, "Sparse matrix -- qrm_spmat of size ", size(shifted_spmat.spmat), " with ", nnz(shifted_spmat.spmat), " nonzeros.")
68+
end
69+
5470
mutable struct c_spfct{T}
5571
m :: Cint
5672
n :: Cint

test/test_qrm.jl

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ end
325325
X = qrm_min_norm(spmat, B, seminormal = true)
326326
R = B - A * X
327327
@test norm(R) tol
328-
328+
329329
X = qrm_min_norm_semi_normal(spmat, B)
330330
R = B - A * X
331331
@test norm(R) tol
@@ -337,7 +337,7 @@ end
337337
qrm_min_norm!(spmat, b, x)
338338
r = b - A * x
339339
@test norm(r) tol
340-
340+
341341
Δx = similar(x)
342342
y = similar(b)
343343
qrm_min_norm_semi_normal!(spmat, spfct, b, x, Δx, y)
@@ -669,6 +669,19 @@ end
669669
x_refined = qrm_refine(spmat, spfct, x, b)
670670
@test norm(b - A'*(A*x)) norm(b - A'*(A*x_refined))
671671

672+
tol = (real(T) == Float32) ? 1e-2 : 1e-9
673+
A = sprand(T, m, n, 0.3)
674+
k = n ÷ 8
675+
A[randperm(n)[1:k],:] .= T(0) # Make A rank deficient.
676+
b = A*rand(T, n) # Make sure b is in the range of A
677+
678+
spmat = qrm_spmat_init(A)
679+
x = qrm_golub_riley(spmat, b, transp = transp)
680+
B = Matrix(A)
681+
682+
@test norm(A*x - b) tol
683+
@test norm(x - pinv(B)*b) tol
684+
@test norm(x - A'*pinv(B*B')*b) tol
672685
end
673686
end
674687

0 commit comments

Comments
 (0)