|
| 1 | + |
| 2 | +import Base.isempty |
| 3 | +using LinearAlgebra |
| 4 | +export blockFGMRES |
| 5 | + |
| 6 | +function blockFGMRES(A::SparseMatrixCSC{T1,Int},b::Array{T2,2},restrt::Int; kwargs...) where {T1,T2} |
| 7 | + Ax = zeros(promote_type(T1,T2),size(b)) |
| 8 | + return blockFGMRES(X -> mul!(Ax,A,X,1.0,0.0),b,restrt;kwargs...) |
| 9 | +end |
| 10 | + |
| 11 | +blockFGMRES(A,b::Array{Any,2},restrt;kwargs...) = blockFGMRES(x -> A*x ,b,restrt;kwargs...) |
| 12 | + |
| 13 | + |
| 14 | +""" |
| 15 | +x,flag,err,iter,resvec = blockFGMRES(A,b,restrt,tol=1e-2,maxIter=100,M=1,x=[],out=0) |
| 16 | +
|
| 17 | +Block (flexible) Generalized Minimal residual ( (F)GMRES(m) ) method with restarts applied to A*x = b. |
| 18 | +
|
| 19 | +This is the "right preconditioning" version of blockGMRES: A*M^{-1}Mx = b. |
| 20 | +
|
| 21 | +Input: |
| 22 | +
|
| 23 | +A - function computing A*x |
| 24 | +b - right hand side matrix (set of vectors) |
| 25 | +restrt - number of iterations between restarts |
| 26 | +tol - error tolerance |
| 27 | +maxIter - maximum number of iterations |
| 28 | +M - preconditioner, function computing M\\x |
| 29 | +x - starting guess |
| 30 | +out - flag for output (0 : only errors, 1 : final status, 2: error at each iteration) |
| 31 | +
|
| 32 | +Output: |
| 33 | +
|
| 34 | +x - approximate solution |
| 35 | +flag - exit flag ( 0 : desired tolerance achieved, |
| 36 | + -1 : maxIter reached without converging |
| 37 | + -9 : right hand side was zero) |
| 38 | + err - norm of relative residual, i.e., norm(A*x-b)/norm(b) |
| 39 | + iter - number of iterations |
| 40 | + resvec - norm of relative residual at each iteration |
| 41 | + |
| 42 | + preconditioner M(r) must return a copy of a matrix. Cannot reuse memory of r. |
| 43 | + """ |
| 44 | +function blockFGMRES(A::Function,B::Array,restrt::Int; tol::Real=1e-2,maxIter::Int=100,M::Function=t->copy(t),X::Array=[],out::Int=0,flexible::Bool=false,mem::FGMRESmem = getEmptyFGMRESmem()) |
| 45 | + # initialization |
| 46 | + n = size(B,1) |
| 47 | + m = size(B,2) |
| 48 | + TYPE = eltype(B) |
| 49 | + mem = checkMemorySize(mem,n,restrt,TYPE,flexible,m); |
| 50 | + |
| 51 | + if norm(B)==0.0 |
| 52 | + return zeros(TYPE,n,m),-9,0.0,0,[0.0]; |
| 53 | + end |
| 54 | + |
| 55 | + if Base.isempty(X) |
| 56 | + X = zeros(eltype(B),n,m) |
| 57 | + R = copy(B); |
| 58 | + elseif norm(X) < eps(real(TYPE)) |
| 59 | + R = copy(B); |
| 60 | + else |
| 61 | + R = copy(B); |
| 62 | + R.-=A(X); |
| 63 | + end |
| 64 | + |
| 65 | + if eltype(B) <: Complex |
| 66 | + X = complex(X) |
| 67 | + end |
| 68 | + if issparse(X) |
| 69 | + error("X is sparse"); |
| 70 | + end |
| 71 | + |
| 72 | + rnorm0 = norm(B); |
| 73 | + |
| 74 | + err = norm(R)/rnorm0 |
| 75 | + if err < tol; return X, err; end |
| 76 | + |
| 77 | + constOne = one(TYPE); |
| 78 | + constZero = zero(TYPE); |
| 79 | + |
| 80 | + restrt = min(restrt,n-1) |
| 81 | + Zbig = mem.Z; |
| 82 | + Vbig = mem.V; |
| 83 | + H = zeros(TYPE,(restrt+1)*m,restrt*m); |
| 84 | + xi = zeros(TYPE,(restrt+1)*m,m); |
| 85 | + T = zeros(TYPE,restrt*m,m); |
| 86 | + |
| 87 | + W = zeros(TYPE,0); |
| 88 | + Z = zeros(TYPE,0); |
| 89 | + |
| 90 | + resvec = zeros(restrt*maxIter) |
| 91 | + if out==2 |
| 92 | + println(@sprintf("=== blockFGMRES ===\n%4s\t%7s\n","iter","relres")) |
| 93 | + end |
| 94 | + |
| 95 | + |
| 96 | + flag = -1 |
| 97 | + counter = 0 |
| 98 | + iter = 0 |
| 99 | + while iter < maxIter |
| 100 | + iter+=1; |
| 101 | + Q = qr!(R); |
| 102 | + Betta = Q.R; |
| 103 | + R[:] = Matrix(Q.Q); # this code is equivalent to (W,Betta) = qr(r); |
| 104 | + xi[1:m,:] = Betta; |
| 105 | + #BLAS.scal!(n,(1/betta)*constOne,r,1); # w = w./betta |
| 106 | + H[:] .= 0.0; |
| 107 | + T[:] .= 0.0; |
| 108 | + |
| 109 | + if out==2;; print(@sprintf("%3d\t", iter));end |
| 110 | + |
| 111 | + for j = 1:restrt |
| 112 | + colSet_j = (((j-1)*m)+1):j*m |
| 113 | + if j==1 |
| 114 | + Vbig[:,colSet_j] = R; # no memory problem with this line.... |
| 115 | + Z = M(R); |
| 116 | + else |
| 117 | + Vbig[:,colSet_j] = W; # no memory problem with this line.... |
| 118 | + Z = M(W); |
| 119 | + end |
| 120 | + if flexible |
| 121 | + Zbig[:,colSet_j] = Z; |
| 122 | + end |
| 123 | + W = A(Z); |
| 124 | + counter += 1; |
| 125 | + # Gram Schmidt (much faster than MGS even though zeros are multiplied, does relatively well): |
| 126 | + BLAS.gemm!('C','N', constOne, Vbig, W,constZero,T); # t = V'*w; |
| 127 | + T[(j*m+1):end,:] .= 0.0; |
| 128 | + H[1:(restrt*m),colSet_j] = T; |
| 129 | + BLAS.gemm!('N','N', -constOne, Vbig, T,constOne,W); # w = w - V*t |
| 130 | + Q = qr!(W); Betta = Q.R; W[:] = Matrix(Q.Q); # this code is equivalent to (W,Betta) = qr(W); |
| 131 | + H[((j*m)+1):(j+1)*m,colSet_j] = Betta; |
| 132 | + y = H[1:(j+1)*m,1:j*m]\xi[1:(j+1)*m,:]; |
| 133 | + err = norm(H[1:(j+1)*m,1:j*m]*y - xi[1:(j+1)*m,:])/rnorm0 |
| 134 | + |
| 135 | + if out==2 print(@sprintf("%1.1e ", err)); end |
| 136 | + resvec[counter] = err; |
| 137 | + if err <= tol |
| 138 | + if flexible |
| 139 | + Zbig[:,(j*m+1):end] .= 0.0; |
| 140 | + else |
| 141 | + Vbig[:,(j*m+1):end] .= 0.0; |
| 142 | + end |
| 143 | + if out==2; print("\n"); end |
| 144 | + flag = 0; break |
| 145 | + end |
| 146 | + end # end for j to restrt |
| 147 | + |
| 148 | + y = pinv(H)*xi; |
| 149 | + |
| 150 | + if flexible |
| 151 | + BLAS.gemm!('N','N', constOne, Zbig, y,constZero,W); # w = Z*y #This is the correction that corresponds to the residual. |
| 152 | + else |
| 153 | + # W = Vbig*y; |
| 154 | + BLAS.gemm!('N','N', constOne, Vbig, y, constZero, W); |
| 155 | + Z = M(W); |
| 156 | + W[:] = Z; |
| 157 | + end |
| 158 | + X .+= W; |
| 159 | + |
| 160 | + if out==2; print("\n"); end |
| 161 | + if err <= tol |
| 162 | + flag = 0; |
| 163 | + break |
| 164 | + end |
| 165 | + if iter < maxIter |
| 166 | + R[:] = B; |
| 167 | + W = A(X); |
| 168 | + R .-= W; |
| 169 | + end |
| 170 | + end #for iter to maxiter |
| 171 | + if out>=0 |
| 172 | + if flag==-1 |
| 173 | + println(@sprintf("blockFGMRES iterated maxIter (=%d) outer iterations without achieving the desired tolerance. Acheived: %1.2e",maxIter,err)) |
| 174 | + elseif flag==0 && out>=1 |
| 175 | + println(@sprintf("blockFGMRES achieved desired tolerance at inner iteration %d. Residual norm is %1.2e.",counter,resvec[counter])) |
| 176 | + end |
| 177 | + end |
| 178 | + return X,flag,resvec[counter],iter,resvec[1:counter] |
| 179 | +end #blockFGMRES |
0 commit comments