Skip to content

Commit 86b79ca

Browse files
committed
merge hippo and aplus cuda pcg kernels
1 parent ba54687 commit 86b79ca

File tree

8 files changed

+31
-460
lines changed

8 files changed

+31
-460
lines changed

include/ff/aplus/induce.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,5 @@ void dfieldAplus(real (*field)[3]);
66

77
void ufieldAplus(const real (*uind)[3], real (*field)[3]);
88

9-
void diagPrecond3(const real (*rsd)[3], real (*zrsd)[3]);
10-
11-
void sparsePrecondBuild3();
129
void sparsePrecondApply3(const real (*rsd)[3], real (*zrsd)[3]);
1310
}

src/acc/aplus/induce.cpp

Lines changed: 0 additions & 229 deletions
Original file line numberDiff line numberDiff line change
@@ -1,239 +1,10 @@
1-
#include "ff/amoeba/induce.h"
21
#include "ff/amoebamod.h"
3-
#include "ff/aplus/induce.h"
42
#include "ff/atom.h"
53
#include "ff/image.h"
64
#include "ff/nblist.h"
7-
#include "ff/switch.h"
85
#include "seq/add.h"
96
#include "seq/damp.h"
10-
#include "tool/error.h"
117
#include "tool/gpucard.h"
12-
#include "tool/ioprint.h"
13-
#include <tinker/detail/inform.hh>
14-
#include <tinker/detail/polpcg.hh>
15-
#include <tinker/detail/polpot.hh>
16-
#include <tinker/detail/units.hh>
17-
18-
namespace tinker {
19-
void induceMutualPcg3_acc(real (*uind)[3])
20-
{
21-
auto* field = work01_;
22-
auto* rsd = work02_;
23-
auto* zrsd = work03_;
24-
auto* conj = work04_;
25-
auto* vec = work05_;
26-
27-
bool dirguess = polpcg::pcgguess;
28-
// use sparse matrix preconditioner
29-
// or just use diagonal matrix preconditioner
30-
const bool sparse_prec = polpcg::pcgprec and (switchOff(Switch::USOLVE) > 0);
31-
32-
// zero out the induced dipoles at each site
33-
34-
bool predict = polpred != UPred::NONE;
35-
if (predict and nualt < maxualt) {
36-
predict = false;
37-
dirguess = true;
38-
}
39-
// get the electrostatic field due to permanent multipoles
40-
dfieldAplus(field);
41-
// direct induced dipoles
42-
43-
#pragma acc parallel loop independent async\
44-
deviceptr(polarity,udir,field)
45-
for (int i = 0; i < n; ++i) {
46-
real poli = polarity[i];
47-
#pragma acc loop seq
48-
for (int j = 0; j < 3; ++j)
49-
udir[i][j] = poli * field[i][j];
50-
}
51-
52-
// initial induced dipole
53-
if (predict) {
54-
ulspredSum(uind, nullptr);
55-
} else if (dirguess) {
56-
darray::copy(g::q0, n, uind, udir);
57-
} else {
58-
darray::zero(g::q0, n, uind);
59-
}
60-
// initial residual r(0)
61-
62-
// if do not use pcgguess, r(0) = E - T Zero = E
63-
// if use pcgguess, r(0) = E - (inv_alpha + Tu) alpha E
64-
// = E - E -Tu udir
65-
// = -Tu udir
66-
67-
if (predict) {
68-
ufieldAplus(uind, field);
69-
#pragma acc parallel loop independent async\
70-
deviceptr(polarity_inv,rsd,udir,uind,field)
71-
for (int i = 0; i < n; ++i) {
72-
real pol = polarity_inv[i];
73-
#pragma acc loop seq
74-
for (int j = 0; j < 3; ++j)
75-
rsd[i][j] = (udir[i][j] - uind[i][j]) * pol + field[i][j];
76-
}
77-
} else if (dirguess) {
78-
ufieldAplus(uind, rsd);
79-
} else {
80-
darray::copy(g::q0, n, rsd, field);
81-
}
82-
#pragma acc parallel loop independent async deviceptr(polarity,rsd)
83-
for (int i = 0; i < n; ++i) {
84-
if (polarity[i] == 0) {
85-
rsd[i][0] = 0;
86-
rsd[i][1] = 0;
87-
rsd[i][2] = 0;
88-
}
89-
}
90-
91-
// initial M r(0) and p(0)
92-
93-
if (sparse_prec) {
94-
sparsePrecondBuild3();
95-
sparsePrecondApply3(rsd, zrsd);
96-
} else {
97-
diagPrecond3(rsd, zrsd);
98-
}
99-
darray::copy(g::q0, n, conj, zrsd);
100-
101-
// initial r(0) M r(0)
102-
103-
real sum;
104-
sum = darray::dotThenReturn(g::q0, n, rsd, zrsd);
105-
106-
// conjugate gradient iteration of the mutual induced dipoles
107-
108-
const bool debug = inform::debug;
109-
const int politer = polpot::politer;
110-
const real poleps = polpot::poleps;
111-
const real debye = units::debye;
112-
const real pcgpeek = polpcg::pcgpeek;
113-
const int maxiter = 100; // see also subroutine induce0a in induce.f
114-
const int miniter = std::min(3, n);
115-
116-
bool done = false;
117-
int iter = 0;
118-
real eps = 100;
119-
// real epsold;
120-
121-
while (not done) {
122-
++iter;
123-
124-
// vec = (inv_alpha + Tu) conj, field = -Tu conj
125-
// vec = inv_alpha * conj - field
126-
ufieldAplus(conj, field);
127-
#pragma acc parallel loop independent async\
128-
deviceptr(polarity_inv,vec,conj,field)
129-
for (int i = 0; i < n; ++i) {
130-
real poli_inv = polarity_inv[i];
131-
#pragma acc loop seq
132-
for (int j = 0; j < 3; ++j)
133-
vec[i][j] = poli_inv * conj[i][j] - field[i][j];
134-
}
135-
136-
// a <- p T p
137-
real a;
138-
a = darray::dotThenReturn(g::q0, n, conj, vec);
139-
// a <- r M r / p T p
140-
if (a != 0)
141-
a = sum / a;
142-
143-
// u <- u + a p
144-
// r <- r - a T p
145-
#pragma acc parallel loop independent async\
146-
deviceptr(polarity,uind,conj,rsd,vec)
147-
for (int i = 0; i < n; ++i) {
148-
#pragma acc loop seq
149-
for (int j = 0; j < 3; ++j) {
150-
uind[i][j] += a * conj[i][j];
151-
rsd[i][j] -= a * vec[i][j];
152-
}
153-
if (polarity[i] == 0) {
154-
rsd[i][0] = 0;
155-
rsd[i][1] = 0;
156-
rsd[i][2] = 0;
157-
}
158-
}
159-
160-
// calculate/update M r
161-
if (sparse_prec)
162-
sparsePrecondApply3(rsd, zrsd);
163-
else
164-
diagPrecond3(rsd, zrsd);
165-
166-
real b;
167-
real sum1;
168-
sum1 = darray::dotThenReturn(g::q0, n, rsd, zrsd);
169-
b = sum1 / sum;
170-
if (sum == 0)
171-
b = 0;
172-
173-
// calculate/update p
174-
#pragma acc parallel loop independent async\
175-
deviceptr(conj,zrsd)
176-
for (int i = 0; i < n; ++i) {
177-
#pragma acc loop seq
178-
for (int j = 0; j < 3; ++j)
179-
conj[i][j] = zrsd[i][j] + b * conj[i][j];
180-
}
181-
182-
sum = sum1;
183-
184-
real epsd;
185-
epsd = darray::dotThenReturn(g::q0, n, rsd, rsd);
186-
// epsold = eps;
187-
eps = epsd;
188-
eps = debye * REAL_SQRT(eps / n);
189-
190-
if (debug) {
191-
if (iter == 1) {
192-
print(stdout,
193-
"\n Determination of SCF Induced Dipole Moments\n\n"
194-
" Iter RMS Residual (Debye)\n\n");
195-
}
196-
print(stdout, " %8d %-16.10f\n", iter, eps);
197-
}
198-
199-
if (eps < poleps)
200-
done = true;
201-
// if (eps > epsold) done = true;
202-
if (iter < miniter) done = false;
203-
if (iter >= politer)
204-
done = true;
205-
206-
// apply a "peek" iteration to the mutual induced dipoles
207-
208-
if (done) {
209-
#pragma acc parallel loop independent async\
210-
deviceptr(polarity,uind,rsd)
211-
for (int i = 0; i < n; ++i) {
212-
real term = pcgpeek * polarity[i];
213-
#pragma acc loop seq
214-
for (int j = 0; j < 3; ++j)
215-
uind[i][j] += term * rsd[i][j];
216-
}
217-
}
218-
}
219-
220-
// print the results from the conjugate gradient iteration
221-
222-
if (debug) {
223-
print(stdout,
224-
" Induced Dipoles : Iterations %4d RMS "
225-
"Residual %14.10f\n",
226-
iter, eps);
227-
}
228-
229-
// terminate the calculation if dipoles failed to converge
230-
231-
if (iter >= maxiter) {
232-
printError();
233-
TINKER_THROW("INDUCE -- Warning, Induced Dipoles are not Converged");
234-
}
235-
}
236-
}
2378

2389
namespace tinker {
23910
#define APPLY_DPTRS rsd, zrsd, x, y, z, polarity, pdamp, thole

src/acc/hippo/induce.cpp

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "ff/amoeba/induce.h"
22
#include "ff/amoebamod.h"
3+
#include "ff/aplus/induce.h"
34
#include "ff/atom.h"
45
#include "ff/hippo/induce.h"
56
#include "ff/hippomod.h"
@@ -17,26 +18,7 @@
1718
#include <tinker/detail/polpot.hh>
1819
#include <tinker/detail/units.hh>
1920

20-
namespace tinker { // PCG
21-
//
22-
// M = preconditioner
23-
// T = inv_alpha + Tu, -Tu = ufield_chgpen
24-
//
25-
// r(0) = E - T u(0)
26-
// p(0) (or conj(0)) = M r(0)
27-
// ------------------------------
28-
// gamma (or a) = r(i) M r(i) / p(i) T p(i)
29-
// u(i+1) = u(i) + a p(i)
30-
// r(i+1) = r(i) - a T p(i)
31-
// beta (or b(i+1)) = r(i+1) M r(i+1) / r(i) M r(i)
32-
// p(i+1) = M r(r+1) + b(i+1) p(i)
33-
// ------------------------------
34-
//
35-
// subroutine induce0a in induce.f
36-
// rsd = r
37-
// zrsd = M r
38-
// conj = p
39-
// vec = T P
21+
namespace tinker {
4022
void induceMutualPcg2_acc(real (*uind)[3])
4123
{
4224
auto* field = work01_;
@@ -45,6 +27,10 @@ void induceMutualPcg2_acc(real (*uind)[3])
4527
auto* conj = work04_;
4628
auto* vec = work05_;
4729

30+
auto pcg_dfield = polpot::use_tholed ? dfieldAplus : dfieldChgpen;
31+
auto pcg_ufield = polpot::use_tholed ? ufieldAplus : ufieldChgpen;
32+
auto pcg_sparse_apply = polpot::use_tholed ? sparsePrecondApply3 : sparsePrecondApply2;
33+
4834
bool dirguess = polpcg::pcgguess;
4935
// use sparse matrix preconditioner
5036
// or just use diagonal matrix preconditioner
@@ -58,7 +44,7 @@ void induceMutualPcg2_acc(real (*uind)[3])
5844
dirguess = true;
5945
}
6046
// get the electrostatic field due to permanent multipoles
61-
dfieldChgpen(field);
47+
pcg_dfield(field);
6248
// direct induced dipoles
6349

6450
#pragma acc parallel loop independent async\
@@ -86,7 +72,7 @@ void induceMutualPcg2_acc(real (*uind)[3])
8672
// = -Tu udir
8773

8874
if (predict) {
89-
ufieldChgpen(uind, field);
75+
pcg_ufield(uind, field);
9076
#pragma acc parallel loop independent async\
9177
deviceptr(polarity_inv,rsd,udir,uind,field)
9278
for (int i = 0; i < n; ++i) {
@@ -96,7 +82,7 @@ void induceMutualPcg2_acc(real (*uind)[3])
9682
rsd[i][j] = (udir[i][j] - uind[i][j]) * pol + field[i][j];
9783
}
9884
} else if (dirguess) {
99-
ufieldChgpen(uind, rsd);
85+
pcg_ufield(uind, rsd);
10086
} else {
10187
darray::copy(g::q0, n, rsd, field);
10288
}
@@ -113,7 +99,7 @@ void induceMutualPcg2_acc(real (*uind)[3])
11399

114100
if (sparse_prec) {
115101
sparsePrecondBuild2();
116-
sparsePrecondApply2(rsd, zrsd);
102+
pcg_sparse_apply(rsd, zrsd);
117103
} else {
118104
diagPrecond2(rsd, zrsd);
119105
}
@@ -144,7 +130,7 @@ void induceMutualPcg2_acc(real (*uind)[3])
144130

145131
// vec = (inv_alpha + Tu) conj, field = -Tu conj
146132
// vec = inv_alpha * conj - field
147-
ufieldChgpen(conj, field);
133+
pcg_ufield(conj, field);
148134
#pragma acc parallel loop independent async\
149135
deviceptr(polarity_inv,vec,conj,field)
150136
for (int i = 0; i < n; ++i) {
@@ -180,7 +166,7 @@ void induceMutualPcg2_acc(real (*uind)[3])
180166

181167
// calculate/update M r
182168
if (sparse_prec)
183-
sparsePrecondApply2(rsd, zrsd);
169+
pcg_sparse_apply(rsd, zrsd);
184170
else
185171
diagPrecond2(rsd, zrsd);
186172

@@ -220,7 +206,8 @@ void induceMutualPcg2_acc(real (*uind)[3])
220206
if (eps < poleps)
221207
done = true;
222208
// if (eps > epsold) done = true;
223-
if (iter < miniter) done = false;
209+
if (iter < miniter)
210+
done = false;
224211
if (iter >= politer)
225212
done = true;
226213

src/aplus/induce.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,6 @@
44
#include "tool/externfunc.h"
55

66
namespace tinker {
7-
void diagPrecond3(const real (*rsd)[3], real (*zrsd)[3])
8-
{
9-
diagPrecond2(rsd, zrsd);
10-
}
11-
12-
void sparsePrecondBuild3() {}
13-
147
TINKER_FVOID2(acc1, cu1, sparsePrecondApply3, const real (*)[3], real (*)[3]);
158
void sparsePrecondApply3(const real (*rsd)[3], real (*zrsd)[3])
169
{

0 commit comments

Comments
 (0)