|
| 1 | +import numpy as np |
| 2 | +import scipy.sparse as sp |
| 3 | +from scipy.sparse.linalg import cg |
| 4 | + |
| 5 | + |
| 6 | + |
| 7 | + |
| 8 | +from qmat import QDELTA_GENERATORS |
| 9 | + |
| 10 | +from pySDC.core.collocation import CollBase |
| 11 | +from pySDC.implementations.problem_classes.HeatEquation_ND_FD import heatNd_unforced |
| 12 | +from pySDC.implementations.problem_classes.AllenCahn_2D_FD import allencahn_fullyimplicit |
| 13 | + |
| 14 | + |
| 15 | +class counter(object): |
| 16 | + def __init__(self, disp=True): |
| 17 | + self._disp = disp |
| 18 | + self.niter = 0 |
| 19 | + def __call__(self, rk=None): |
| 20 | + self.niter += 1 |
| 21 | + if self._disp: |
| 22 | + # print('iter %3i\trk = %s' % (self.niter, str(rk))) |
| 23 | + print(' LIN: %3i' % self.niter) |
| 24 | + |
| 25 | +def linear(): |
| 26 | + |
| 27 | + # instantiate problem |
| 28 | + prob = heatNd_unforced( |
| 29 | + nvars=1023, # number of degrees of freedom |
| 30 | + nu=0.1, # diffusion coefficient |
| 31 | + freq=4, # frequency for the test value |
| 32 | + bc='dirichlet-zero', # boundary conditions |
| 33 | + ) |
| 34 | + |
| 35 | + coll = CollBase(num_nodes=3, tleft=0, tright=1, node_type='LEGENDRE', quad_type='RADAU-RIGHT') |
| 36 | + |
| 37 | + generator = QDELTA_GENERATORS['LU']( |
| 38 | + # for algebraic types (LU, ...) |
| 39 | + Q=coll.generator.Q, |
| 40 | + # for MIN in tables, MIN-SR-S ... |
| 41 | + nNodes=coll.num_nodes, |
| 42 | + nodeType=coll.node_type, |
| 43 | + quadType=coll.quad_type, |
| 44 | + # for time-stepping types, MIN-SR-NS |
| 45 | + nodes=coll.nodes, |
| 46 | + tLeft=coll.tleft, |
| 47 | + ) |
| 48 | + |
| 49 | + dt = 0.001 |
| 50 | + |
| 51 | + # shrink collocation matrix: first line and column deals with initial value, not needed here |
| 52 | + Q = coll.Qmat[1:, 1:] |
| 53 | + QDmat = generator.genCoeffs(k=None) |
| 54 | + |
| 55 | + # build system matrix M of collocation problem |
| 56 | + M = sp.eye(prob.nvars[0] * coll.num_nodes) - dt * sp.kron(Q, prob.A) |
| 57 | + |
| 58 | + P = sp.eye(prob.nvars[0] * coll.num_nodes) - dt * sp.kron(QDmat, prob.A) |
| 59 | + |
| 60 | + # get initial value at t0 = 0 |
| 61 | + u0 = prob.u_exact(t=0) |
| 62 | + # fill in u0-vector as right-hand side for the collocation problem |
| 63 | + u0_coll = np.kron(np.ones(coll.num_nodes), u0) |
| 64 | + # get exact solution at Tend = dt |
| 65 | + uend = prob.u_exact(t=dt) |
| 66 | + |
| 67 | + # solve collocation problem directly |
| 68 | + u_coll = sp.linalg.spsolve(M, u0_coll) |
| 69 | + # compute error |
| 70 | + err = np.linalg.norm(u_coll[-prob.nvars[0] :] - uend, np.inf) |
| 71 | + print('Error exact inverse:', err) |
| 72 | + |
| 73 | + kmax = 10 |
| 74 | + tol = 1E-10 |
| 75 | + uk = np.zeros(prob.nvars[0] * coll.num_nodes, dtype='float64') |
| 76 | + res = np.zeros(prob.nvars[0] * coll.num_nodes, dtype='float32') |
| 77 | + res[:] = u0_coll - M.dot(uk) |
| 78 | + for k in range(kmax): |
| 79 | + |
| 80 | + # does not work well with float32 |
| 81 | + # res[:] = u0_coll + dt * sp.kron(Q-QDmat, prob.A).dot(uk) |
| 82 | + # uk = sp.linalg.spsolve(P,res) |
| 83 | + |
| 84 | + # works well with float32 |
| 85 | + uk += sp.linalg.spsolve(P, res) |
| 86 | + res[:] = u0_coll - M.dot(uk) |
| 87 | + |
| 88 | + # but how can we do this for nonlinear problems? it seems there is no iterative refinement for nonlinear problems?? |
| 89 | + # May need to go to Outer-Newton, inner SDC. See also https://zenodo.org/records/6835437 |
| 90 | + |
| 91 | + resnorm = np.linalg.norm(res, np.inf) |
| 92 | + err = np.linalg.norm(uk[-prob.nvars[0]:] - uend, np.inf) |
| 93 | + print(k, resnorm, err) |
| 94 | + if resnorm < tol: |
| 95 | + break |
| 96 | + # print(k, resnorm) |
| 97 | + |
| 98 | +def nonlinear(): |
| 99 | + # instantiate problem |
| 100 | + prob = allencahn_fullyimplicit(nvars=(64,64)) |
| 101 | + |
| 102 | + coll = CollBase(num_nodes=4, tleft=0, tright=1, node_type='LEGENDRE', quad_type='RADAU-RIGHT') |
| 103 | + |
| 104 | + generator = QDELTA_GENERATORS['MIN-SR-S']( |
| 105 | + # for algebraic types (LU, ...) |
| 106 | + Q=coll.generator.Q, |
| 107 | + # for MIN in tables, MIN-SR-S ... |
| 108 | + nNodes=coll.num_nodes, |
| 109 | + nodeType=coll.node_type, |
| 110 | + quadType=coll.quad_type, |
| 111 | + # for time-stepping types, MIN-SR-NS |
| 112 | + nodes=coll.nodes, |
| 113 | + tLeft=coll.tleft, |
| 114 | + ) |
| 115 | + QDmat = generator.genCoeffs(k=None) |
| 116 | + |
| 117 | + dt = 0.001 / 2 |
| 118 | + |
| 119 | + # shrink collocation matrix: first line and column deals with initial value, not needed here |
| 120 | + Q = coll.Qmat[1:, 1:] |
| 121 | + |
| 122 | + # c = coll.nodes |
| 123 | + # V = np.fliplr(np.vander(c)) |
| 124 | + # C = np.diag(c) |
| 125 | + # R = np.diag([1 / i for i in range(1,coll.num_nodes+1)]) |
| 126 | + # print(C @ V @ R @ np.linalg.inv(V) - Q) |
| 127 | + # exit() |
| 128 | + |
| 129 | + |
| 130 | + u0 = prob.u_exact(t=0).flatten() |
| 131 | + # fill in u0-vector as right-hand side for the collocation problem |
| 132 | + u0_coll = np.kron(np.ones(coll.num_nodes), u0) |
| 133 | + |
| 134 | + nvars = prob.nvars[0] * prob.nvars[1] |
| 135 | + |
| 136 | + un = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 137 | + fn = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 138 | + g = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 139 | + # vk = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 140 | + |
| 141 | + ksum = 0 |
| 142 | + n = 0 |
| 143 | + n_max = 100 |
| 144 | + tol_newton = 1E-10 |
| 145 | + |
| 146 | + count = counter() |
| 147 | + |
| 148 | + while n < n_max: |
| 149 | + # form the function g with g(u) = 0 |
| 150 | + for m in range(coll.num_nodes): |
| 151 | + fn[m * nvars: (m + 1) * nvars] = prob.eval_f(un[m * nvars: (m + 1) * nvars], |
| 152 | + t=dt * coll.nodes[m]).flatten() |
| 153 | + g[:] = u0_coll - (un - dt * sp.kron(Q, sp.eye(nvars)).dot(fn)) |
| 154 | + |
| 155 | + # if g is close to 0, then we are done |
| 156 | + res_newton = np.linalg.norm(g, np.inf) |
| 157 | + print('Newton:', n, res_newton) |
| 158 | + |
| 159 | + if res_newton < tol_newton: |
| 160 | + break |
| 161 | + |
| 162 | + # assemble dg |
| 163 | + dg = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(Q, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un ** prob.nu), offsets=0)) |
| 164 | + |
| 165 | + # Newton |
| 166 | + # vk = sp.linalg.spsolve(dg, g) |
| 167 | + # vk = sp.linalg.gmres(dg, g, x0=np.zeros_like(vk), maxiter=10, atol=1E-10, callback=count)[0] |
| 168 | + # iter_count += count.niter |
| 169 | + # un -= vk |
| 170 | + # continue |
| 171 | + |
| 172 | + # Collocation |
| 173 | + # Emulate Newton |
| 174 | + # dgP = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(Q, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un ** prob.nu), offsets=0)) |
| 175 | + # Linear SDC (e.g. with diagonal preconditioner) |
| 176 | + # dgP = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(QDmat, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un ** prob.nu), offsets=0)) |
| 177 | + # Linear SDC with frozen Jacobian |
| 178 | + dgP = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(QDmat, prob.A + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un[0:nvars] ** prob.nu), offsets=0)) |
| 179 | + # dgP = dgP.astype('float64') |
| 180 | + |
| 181 | + # Diagonalization |
| 182 | + # D, V = np.linalg.eig(Q) |
| 183 | + # Vinv = np.linalg.inv(V) |
| 184 | + # dg_diag = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(sp.diags(D), prob.A + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un[0:nvars] ** prob.nu), offsets=0)) |
| 185 | + # dg_diag = dg_diag.astype('complex64') |
| 186 | + |
| 187 | + vk = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 188 | + res = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 189 | + |
| 190 | + res[:] = g - dg.dot(vk) |
| 191 | + k = 0 |
| 192 | + tol_sdc = 1E-11 |
| 193 | + kmax = 1 |
| 194 | + while k < kmax: |
| 195 | + # Newton/Collocation: works well with float32 for both P and res, but not for vk |
| 196 | + # vk += sp.linalg.spsolve(dgP, res) |
| 197 | + vk += sp.linalg.gmres(dgP, res, x0=np.zeros_like(res), maxiter=1, atol=1E-10, callback=count)[0] |
| 198 | + |
| 199 | + # Newton/Diagonalization |
| 200 | + # vk += np.real(sp.kron(V,sp.eye(nvars)).dot(sp.linalg.spsolve(dg_diag, sp.kron(Vinv,sp.eye(nvars)).dot(res)))) |
| 201 | + # vk += np.real(sp.kron(V,sp.eye(nvars)).dot(sp.linalg.gmres(dg_diag, sp.kron(Vinv,sp.eye(nvars)).dot(res), x0=np.zeros_like(res), maxiter=1, atol=1E-10, callback=count)[0])) |
| 202 | + |
| 203 | + res[:] = g - dg.dot(vk) |
| 204 | + resnorm = np.linalg.norm(res, np.inf) |
| 205 | + print(' SDC:', n, k, ksum, resnorm) |
| 206 | + |
| 207 | + if resnorm < tol_sdc: |
| 208 | + break |
| 209 | + k += 1 |
| 210 | + ksum += 1 |
| 211 | + |
| 212 | + # Update |
| 213 | + un -= vk |
| 214 | + |
| 215 | + # increase Newton iteration count |
| 216 | + n += 1 |
| 217 | + |
| 218 | +def nonlinear_sdc(): |
| 219 | + # instantiate problem |
| 220 | + prob = allencahn_fullyimplicit(nvars=(64,64)) |
| 221 | + |
| 222 | + coll = CollBase(num_nodes=4, tleft=0, tright=1, node_type='LEGENDRE', quad_type='RADAU-RIGHT') |
| 223 | + |
| 224 | + generator = QDELTA_GENERATORS['MIN-SR-S']( |
| 225 | + # for algebraic types (LU, ...) |
| 226 | + Q=coll.generator.Q, |
| 227 | + # for MIN in tables, MIN-SR-S ... |
| 228 | + nNodes=coll.num_nodes, |
| 229 | + nodeType=coll.node_type, |
| 230 | + quadType=coll.quad_type, |
| 231 | + # for time-stepping types, MIN-SR-NS |
| 232 | + nodes=coll.nodes, |
| 233 | + tLeft=coll.tleft, |
| 234 | + ) |
| 235 | + QDmat = generator.genCoeffs(k=None) |
| 236 | + |
| 237 | + dt = 0.001 / 2 |
| 238 | + |
| 239 | + # shrink collocation matrix: first line and column deals with initial value, not needed here |
| 240 | + Q = coll.Qmat[1:, 1:] |
| 241 | + |
| 242 | + u0 = prob.u_exact(t=0).flatten() |
| 243 | + # fill in u0-vector as right-hand side for the collocation problem |
| 244 | + u0_coll = np.kron(np.ones(coll.num_nodes), u0) |
| 245 | + |
| 246 | + nvars = prob.nvars[0] * prob.nvars[1] |
| 247 | + |
| 248 | + uk = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 249 | + fk = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 250 | + rhs = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 251 | + |
| 252 | + ksum = 0 |
| 253 | + k = 0 |
| 254 | + k_max = 100 |
| 255 | + tol_sdc = 1E-10 |
| 256 | + |
| 257 | + count = counter() |
| 258 | + |
| 259 | + while k < k_max: |
| 260 | + # eval rhs |
| 261 | + for m in range(coll.num_nodes): |
| 262 | + fk[m * nvars: (m + 1) * nvars] = prob.eval_f(uk[m * nvars: (m + 1) * nvars], |
| 263 | + t=dt * coll.nodes[m]).flatten() |
| 264 | + |
| 265 | + resnorm = np.linalg.norm(u0_coll - (uk - dt * sp.kron(Q, sp.eye(nvars)).dot(fk)), np.inf) |
| 266 | + print('SDC:', k, ksum, resnorm) |
| 267 | + if resnorm < tol_sdc: |
| 268 | + break |
| 269 | + |
| 270 | + g = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 271 | + vn = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 272 | + fn = np.zeros(nvars * coll.num_nodes, dtype='float64') |
| 273 | + n = 0 |
| 274 | + n_max = 100 |
| 275 | + tol_newton = 1E-11 |
| 276 | + while n < n_max: |
| 277 | + for m in range(coll.num_nodes): |
| 278 | + fn[m * nvars: (m + 1) * nvars] = prob.eval_f(vn[m * nvars: (m + 1) * nvars], |
| 279 | + t=dt * coll.nodes[m]).flatten() |
| 280 | + g[:] = u0_coll + dt * sp.kron(Q-QDmat, sp.eye(nvars)).dot(fk) - (vn - dt * sp.kron(QDmat, sp.eye(nvars)).dot(fn)) |
| 281 | + |
| 282 | + # if g is close to 0, then we are done |
| 283 | + res_newton = np.linalg.norm(g, np.inf) |
| 284 | + print(' Newton:', n, res_newton) |
| 285 | + n += 1 |
| 286 | + if res_newton < tol_newton: |
| 287 | + break |
| 288 | + |
| 289 | + # assemble dg |
| 290 | + dg = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(QDmat, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * vn ** prob.nu), offsets=0)) |
| 291 | + |
| 292 | + # Newton |
| 293 | + # vk = sp.linalg.spsolve(dg, g) |
| 294 | + vn -= sp.linalg.gmres(dg, g, x0=np.zeros_like(vn), maxiter=100, atol=1E-12, callback=count)[0] |
| 295 | + |
| 296 | + uk = vn.copy() |
| 297 | + |
| 298 | + k += 1 |
| 299 | + |
| 300 | + |
| 301 | +if __name__ == "__main__": |
| 302 | + # linear() |
| 303 | + # nonlinear() |
| 304 | + nonlinear_sdc() |
0 commit comments