Skip to content

Commit 3532f60

Browse files
committed
Playing with newton and mixed precision
1 parent 79da3f2 commit 3532f60

File tree

2 files changed

+304
-0
lines changed

2 files changed

+304
-0
lines changed

metrixpp.db

76 KB
Binary file not shown.
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
import numpy as np
2+
import scipy.sparse as sp
3+
from scipy.sparse.linalg import cg
4+
5+
6+
7+
8+
from qmat import QDELTA_GENERATORS
9+
10+
from pySDC.core.collocation import CollBase
11+
from pySDC.implementations.problem_classes.HeatEquation_ND_FD import heatNd_unforced
12+
from pySDC.implementations.problem_classes.AllenCahn_2D_FD import allencahn_fullyimplicit
13+
14+
15+
class counter(object):
16+
def __init__(self, disp=True):
17+
self._disp = disp
18+
self.niter = 0
19+
def __call__(self, rk=None):
20+
self.niter += 1
21+
if self._disp:
22+
# print('iter %3i\trk = %s' % (self.niter, str(rk)))
23+
print(' LIN: %3i' % self.niter)
24+
25+
def linear():
26+
27+
# instantiate problem
28+
prob = heatNd_unforced(
29+
nvars=1023, # number of degrees of freedom
30+
nu=0.1, # diffusion coefficient
31+
freq=4, # frequency for the test value
32+
bc='dirichlet-zero', # boundary conditions
33+
)
34+
35+
coll = CollBase(num_nodes=3, tleft=0, tright=1, node_type='LEGENDRE', quad_type='RADAU-RIGHT')
36+
37+
generator = QDELTA_GENERATORS['LU'](
38+
# for algebraic types (LU, ...)
39+
Q=coll.generator.Q,
40+
# for MIN in tables, MIN-SR-S ...
41+
nNodes=coll.num_nodes,
42+
nodeType=coll.node_type,
43+
quadType=coll.quad_type,
44+
# for time-stepping types, MIN-SR-NS
45+
nodes=coll.nodes,
46+
tLeft=coll.tleft,
47+
)
48+
49+
dt = 0.001
50+
51+
# shrink collocation matrix: first line and column deals with initial value, not needed here
52+
Q = coll.Qmat[1:, 1:]
53+
QDmat = generator.genCoeffs(k=None)
54+
55+
# build system matrix M of collocation problem
56+
M = sp.eye(prob.nvars[0] * coll.num_nodes) - dt * sp.kron(Q, prob.A)
57+
58+
P = sp.eye(prob.nvars[0] * coll.num_nodes) - dt * sp.kron(QDmat, prob.A)
59+
60+
# get initial value at t0 = 0
61+
u0 = prob.u_exact(t=0)
62+
# fill in u0-vector as right-hand side for the collocation problem
63+
u0_coll = np.kron(np.ones(coll.num_nodes), u0)
64+
# get exact solution at Tend = dt
65+
uend = prob.u_exact(t=dt)
66+
67+
# solve collocation problem directly
68+
u_coll = sp.linalg.spsolve(M, u0_coll)
69+
# compute error
70+
err = np.linalg.norm(u_coll[-prob.nvars[0] :] - uend, np.inf)
71+
print('Error exact inverse:', err)
72+
73+
kmax = 10
74+
tol = 1E-10
75+
uk = np.zeros(prob.nvars[0] * coll.num_nodes, dtype='float64')
76+
res = np.zeros(prob.nvars[0] * coll.num_nodes, dtype='float32')
77+
res[:] = u0_coll - M.dot(uk)
78+
for k in range(kmax):
79+
80+
# does not work well with float32
81+
# res[:] = u0_coll + dt * sp.kron(Q-QDmat, prob.A).dot(uk)
82+
# uk = sp.linalg.spsolve(P,res)
83+
84+
# works well with float32
85+
uk += sp.linalg.spsolve(P, res)
86+
res[:] = u0_coll - M.dot(uk)
87+
88+
# but how can we do this for nonlinear problems? it seems there is no iterative refinement for nonlinear problems??
89+
# May need to go to Outer-Newton, inner SDC. See also https://zenodo.org/records/6835437
90+
91+
resnorm = np.linalg.norm(res, np.inf)
92+
err = np.linalg.norm(uk[-prob.nvars[0]:] - uend, np.inf)
93+
print(k, resnorm, err)
94+
if resnorm < tol:
95+
break
96+
# print(k, resnorm)
97+
98+
def nonlinear():
99+
# instantiate problem
100+
prob = allencahn_fullyimplicit(nvars=(64,64))
101+
102+
coll = CollBase(num_nodes=4, tleft=0, tright=1, node_type='LEGENDRE', quad_type='RADAU-RIGHT')
103+
104+
generator = QDELTA_GENERATORS['MIN-SR-S'](
105+
# for algebraic types (LU, ...)
106+
Q=coll.generator.Q,
107+
# for MIN in tables, MIN-SR-S ...
108+
nNodes=coll.num_nodes,
109+
nodeType=coll.node_type,
110+
quadType=coll.quad_type,
111+
# for time-stepping types, MIN-SR-NS
112+
nodes=coll.nodes,
113+
tLeft=coll.tleft,
114+
)
115+
QDmat = generator.genCoeffs(k=None)
116+
117+
dt = 0.001 / 2
118+
119+
# shrink collocation matrix: first line and column deals with initial value, not needed here
120+
Q = coll.Qmat[1:, 1:]
121+
122+
# c = coll.nodes
123+
# V = np.fliplr(np.vander(c))
124+
# C = np.diag(c)
125+
# R = np.diag([1 / i for i in range(1,coll.num_nodes+1)])
126+
# print(C @ V @ R @ np.linalg.inv(V) - Q)
127+
# exit()
128+
129+
130+
u0 = prob.u_exact(t=0).flatten()
131+
# fill in u0-vector as right-hand side for the collocation problem
132+
u0_coll = np.kron(np.ones(coll.num_nodes), u0)
133+
134+
nvars = prob.nvars[0] * prob.nvars[1]
135+
136+
un = np.zeros(nvars * coll.num_nodes, dtype='float64')
137+
fn = np.zeros(nvars * coll.num_nodes, dtype='float64')
138+
g = np.zeros(nvars * coll.num_nodes, dtype='float64')
139+
# vk = np.zeros(nvars * coll.num_nodes, dtype='float64')
140+
141+
ksum = 0
142+
n = 0
143+
n_max = 100
144+
tol_newton = 1E-10
145+
146+
count = counter()
147+
148+
while n < n_max:
149+
# form the function g with g(u) = 0
150+
for m in range(coll.num_nodes):
151+
fn[m * nvars: (m + 1) * nvars] = prob.eval_f(un[m * nvars: (m + 1) * nvars],
152+
t=dt * coll.nodes[m]).flatten()
153+
g[:] = u0_coll - (un - dt * sp.kron(Q, sp.eye(nvars)).dot(fn))
154+
155+
# if g is close to 0, then we are done
156+
res_newton = np.linalg.norm(g, np.inf)
157+
print('Newton:', n, res_newton)
158+
159+
if res_newton < tol_newton:
160+
break
161+
162+
# assemble dg
163+
dg = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(Q, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un ** prob.nu), offsets=0))
164+
165+
# Newton
166+
# vk = sp.linalg.spsolve(dg, g)
167+
# vk = sp.linalg.gmres(dg, g, x0=np.zeros_like(vk), maxiter=10, atol=1E-10, callback=count)[0]
168+
# iter_count += count.niter
169+
# un -= vk
170+
# continue
171+
172+
# Collocation
173+
# Emulate Newton
174+
# dgP = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(Q, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un ** prob.nu), offsets=0))
175+
# Linear SDC (e.g. with diagonal preconditioner)
176+
# dgP = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(QDmat, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un ** prob.nu), offsets=0))
177+
# Linear SDC with frozen Jacobian
178+
dgP = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(QDmat, prob.A + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un[0:nvars] ** prob.nu), offsets=0))
179+
# dgP = dgP.astype('float64')
180+
181+
# Diagonalization
182+
# D, V = np.linalg.eig(Q)
183+
# Vinv = np.linalg.inv(V)
184+
# dg_diag = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(sp.diags(D), prob.A + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * un[0:nvars] ** prob.nu), offsets=0))
185+
# dg_diag = dg_diag.astype('complex64')
186+
187+
vk = np.zeros(nvars * coll.num_nodes, dtype='float64')
188+
res = np.zeros(nvars * coll.num_nodes, dtype='float64')
189+
190+
res[:] = g - dg.dot(vk)
191+
k = 0
192+
tol_sdc = 1E-11
193+
kmax = 1
194+
while k < kmax:
195+
# Newton/Collocation: works well with float32 for both P and res, but not for vk
196+
# vk += sp.linalg.spsolve(dgP, res)
197+
vk += sp.linalg.gmres(dgP, res, x0=np.zeros_like(res), maxiter=1, atol=1E-10, callback=count)[0]
198+
199+
# Newton/Diagonalization
200+
# vk += np.real(sp.kron(V,sp.eye(nvars)).dot(sp.linalg.spsolve(dg_diag, sp.kron(Vinv,sp.eye(nvars)).dot(res))))
201+
# vk += np.real(sp.kron(V,sp.eye(nvars)).dot(sp.linalg.gmres(dg_diag, sp.kron(Vinv,sp.eye(nvars)).dot(res), x0=np.zeros_like(res), maxiter=1, atol=1E-10, callback=count)[0]))
202+
203+
res[:] = g - dg.dot(vk)
204+
resnorm = np.linalg.norm(res, np.inf)
205+
print(' SDC:', n, k, ksum, resnorm)
206+
207+
if resnorm < tol_sdc:
208+
break
209+
k += 1
210+
ksum += 1
211+
212+
# Update
213+
un -= vk
214+
215+
# increase Newton iteration count
216+
n += 1
217+
218+
def nonlinear_sdc():
219+
# instantiate problem
220+
prob = allencahn_fullyimplicit(nvars=(64,64))
221+
222+
coll = CollBase(num_nodes=4, tleft=0, tright=1, node_type='LEGENDRE', quad_type='RADAU-RIGHT')
223+
224+
generator = QDELTA_GENERATORS['MIN-SR-S'](
225+
# for algebraic types (LU, ...)
226+
Q=coll.generator.Q,
227+
# for MIN in tables, MIN-SR-S ...
228+
nNodes=coll.num_nodes,
229+
nodeType=coll.node_type,
230+
quadType=coll.quad_type,
231+
# for time-stepping types, MIN-SR-NS
232+
nodes=coll.nodes,
233+
tLeft=coll.tleft,
234+
)
235+
QDmat = generator.genCoeffs(k=None)
236+
237+
dt = 0.001 / 2
238+
239+
# shrink collocation matrix: first line and column deals with initial value, not needed here
240+
Q = coll.Qmat[1:, 1:]
241+
242+
u0 = prob.u_exact(t=0).flatten()
243+
# fill in u0-vector as right-hand side for the collocation problem
244+
u0_coll = np.kron(np.ones(coll.num_nodes), u0)
245+
246+
nvars = prob.nvars[0] * prob.nvars[1]
247+
248+
uk = np.zeros(nvars * coll.num_nodes, dtype='float64')
249+
fk = np.zeros(nvars * coll.num_nodes, dtype='float64')
250+
rhs = np.zeros(nvars * coll.num_nodes, dtype='float64')
251+
252+
ksum = 0
253+
k = 0
254+
k_max = 100
255+
tol_sdc = 1E-10
256+
257+
count = counter()
258+
259+
while k < k_max:
260+
# eval rhs
261+
for m in range(coll.num_nodes):
262+
fk[m * nvars: (m + 1) * nvars] = prob.eval_f(uk[m * nvars: (m + 1) * nvars],
263+
t=dt * coll.nodes[m]).flatten()
264+
265+
resnorm = np.linalg.norm(u0_coll - (uk - dt * sp.kron(Q, sp.eye(nvars)).dot(fk)), np.inf)
266+
print('SDC:', k, ksum, resnorm)
267+
if resnorm < tol_sdc:
268+
break
269+
270+
g = np.zeros(nvars * coll.num_nodes, dtype='float64')
271+
vn = np.zeros(nvars * coll.num_nodes, dtype='float64')
272+
fn = np.zeros(nvars * coll.num_nodes, dtype='float64')
273+
n = 0
274+
n_max = 100
275+
tol_newton = 1E-11
276+
while n < n_max:
277+
for m in range(coll.num_nodes):
278+
fn[m * nvars: (m + 1) * nvars] = prob.eval_f(vn[m * nvars: (m + 1) * nvars],
279+
t=dt * coll.nodes[m]).flatten()
280+
g[:] = u0_coll + dt * sp.kron(Q-QDmat, sp.eye(nvars)).dot(fk) - (vn - dt * sp.kron(QDmat, sp.eye(nvars)).dot(fn))
281+
282+
# if g is close to 0, then we are done
283+
res_newton = np.linalg.norm(g, np.inf)
284+
print(' Newton:', n, res_newton)
285+
n += 1
286+
if res_newton < tol_newton:
287+
break
288+
289+
# assemble dg
290+
dg = -sp.eye(nvars * coll.num_nodes) + dt * sp.kron(QDmat, sp.eye(nvars)).dot(sp.kron(sp.eye(coll.num_nodes), prob.A) + 1.0 / prob.eps**2 * sp.diags((1.0 - (prob.nu + 1) * vn ** prob.nu), offsets=0))
291+
292+
# Newton
293+
# vk = sp.linalg.spsolve(dg, g)
294+
vn -= sp.linalg.gmres(dg, g, x0=np.zeros_like(vn), maxiter=100, atol=1E-12, callback=count)[0]
295+
296+
uk = vn.copy()
297+
298+
k += 1
299+
300+
301+
if __name__ == "__main__":
302+
# linear()
303+
# nonlinear()
304+
nonlinear_sdc()

0 commit comments

Comments
 (0)