Skip to content

Commit ca86b8d

Browse files
committed
expol: simplify PCG codes
1 parent e918b93 commit ca86b8d

File tree

8 files changed

+145
-166
lines changed

8 files changed

+145
-166
lines changed

include/ff/cuinduce.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#pragma once
2+
#include "ff/precision.h"
3+
4+
namespace tinker {
5+
// udir = polarity * field
6+
7+
__global__
8+
void pcgUdirV1(int n, const real* polarity, //
9+
real (*udir)[3], const real (*field)[3]);
10+
11+
__global__
12+
void pcgUdirV2(int n, const real* polarity, //
13+
real (*udir)[3], real (*udirp)[3], const real (*field)[3], const real (*fieldp)[3]);
14+
15+
// r(0) = E - (1/polarity + Tu) u(0) = (udir - u(0))/polarity + mutual field
16+
17+
__global__
18+
void pcgRsd0V1(int n, const real* polarity_inv, real (*rsd)[3], //
19+
const real (*udir)[3], const real (*uind)[3], const real (*field)[3]);
20+
21+
__global__
22+
void pcgRsd0V2(int n, const real* polarity_inv, real (*rsd)[3], real (*rsp)[3], //
23+
const real (*udir)[3], const real (*udip)[3], const real (*uind)[3], const real (*uinp)[3],
24+
const real (*field)[3], const real (*fielp)[3]);
25+
26+
__global__
27+
void pcgRsd0V3(int n, const real* polarity_inv, real (*rsd)[3], //
28+
const real (*udir)[3], const real (*uind)[3], const real (*field)[3],
29+
const real (*polscale)[3][3]);
30+
}

src/acc/hippo/expol.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ void alterpol_acc(real (*polscale)[3][3], real (*polinv)[3][3])
142142
}
143143
}
144144

145-
void dexpol_acc(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy,
146-
grad_prec* depz, VirialBuffer restrict vir_ep)
145+
void dexpol_acc(int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy, grad_prec* depz,
146+
VirialBuffer vir_ep)
147147
{
148148
auto do_v = vers & calc::virial;
149149
real cut = switchCut(Switch::REPULS);

src/cu/amoeba/pcg.cu

Lines changed: 11 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "ff/amoeba/induce.h"
22
#include "ff/amoebamod.h"
3+
#include "ff/cuinduce.h"
34
#include "ff/switch.h"
45
#include "seq/launch.h"
56
#include "tool/error.h"
@@ -10,36 +11,6 @@
1011
#include <tinker/detail/units.hh>
1112

1213
namespace tinker {
13-
__global__
14-
void pcgUdir(int n, const real* restrict polarity, real (*restrict udir)[3],
15-
real (*restrict udirp)[3], const real (*restrict field)[3], const real (*restrict fieldp)[3])
16-
{
17-
for (int i = ITHREAD; i < n; i += STRIDE) {
18-
real poli = polarity[i];
19-
#pragma unroll
20-
for (int j = 0; j < 3; ++j) {
21-
udir[i][j] = poli * field[i][j];
22-
udirp[i][j] = poli * fieldp[i][j];
23-
}
24-
}
25-
}
26-
27-
__global__
28-
void pcgRsd(int n, const real* restrict polarity_inv, //
29-
real (*restrict rsd)[3], real (*restrict rsp)[3], //
30-
const real (*restrict udir)[3], const real (*restrict udip)[3], const real (*restrict uind)[3],
31-
const real (*restrict uinp)[3], const real (*restrict field)[3], const real (*restrict fielp)[3])
32-
{
33-
for (int i = ITHREAD; i < n; i += STRIDE) {
34-
real poli_inv = polarity_inv[i];
35-
#pragma unroll
36-
for (int j = 0; j < 3; ++j) {
37-
rsd[i][j] = (udir[i][j] - uind[i][j]) * poli_inv + field[i][j];
38-
rsp[i][j] = (udip[i][j] - uinp[i][j]) * poli_inv + fielp[i][j];
39-
}
40-
}
41-
}
42-
4314
__global__
4415
void pcgRsd0(
4516
int n, const real* restrict polarity, real (*restrict rsd)[3], real (*restrict rsdp)[3])
@@ -81,10 +52,8 @@ void pcgP2(int n, const real* restrict polarity, //
8152
{
8253
real kaval = *ka, kapval = *kap;
8354
real a = *ksum / kaval, ap = *ksump / kapval;
84-
if (kaval == 0)
85-
a = 0;
86-
if (kapval == 0)
87-
ap = 0;
55+
if (kaval == 0) a = 0;
56+
if (kapval == 0) ap = 0;
8857
for (int i = ITHREAD; i < n; i += STRIDE) {
8958
#pragma unroll
9059
for (int j = 0; j < 3; ++j) {
@@ -111,10 +80,8 @@ void pcgP3(int n, const real* restrict ksum, const real* restrict ksump, const r
11180
{
11281
real kaval = *ksum, kapval = *ksump;
11382
real b = *ksum1 / kaval, bp = *ksump1 / kapval;
114-
if (kaval == 0)
115-
b = 0;
116-
if (kapval == 0)
117-
bp = 0;
83+
if (kaval == 0) b = 0;
84+
if (kapval == 0) bp = 0;
11885
for (int i = ITHREAD; i < n; i += STRIDE) {
11986
#pragma unroll
12087
for (int j = 0; j < 3; ++j) {
@@ -162,7 +129,7 @@ void induceMutualPcg1_cu(real (*uind)[3], real (*uinp)[3])
162129
// get the electrostatic field due to permanent multipoles
163130
dfield(field, fieldp);
164131
// direct induced dipoles
165-
launch_k1s(g::s0, n, pcgUdir, n, polarity, udir, udirp, field, fieldp);
132+
launch_k1s(g::s0, n, pcgUdirV2, n, polarity, udir, udirp, field, fieldp);
166133

167134
// initial induced dipole
168135
if (predict) {
@@ -188,7 +155,7 @@ void induceMutualPcg1_cu(real (*uind)[3], real (*uinp)[3])
188155
if (predict) {
189156
ufield(uind, uinp, field, fieldp);
190157
launch_k1s(
191-
g::s0, n, pcgRsd, n, polarity_inv, rsd, rsdp, udir, udirp, uind, uinp, field, fieldp);
158+
g::s0, n, pcgRsd0V2, n, polarity_inv, rsd, rsdp, udir, udirp, uind, uinp, field, fieldp);
192159
} else if (dirguess) {
193160
ufield(udir, udirp, rsd, rsdp);
194161
} else {
@@ -285,17 +252,12 @@ void induceMutualPcg1_cu(real (*uind)[3], real (*uinp)[3])
285252
print(stdout, " %8d %-16.10f\n", iter, eps);
286253
}
287254

288-
if (eps < poleps)
289-
done = true;
290-
if (eps > epsold)
291-
done = true;
292-
if (iter >= politer)
293-
done = true;
255+
if (eps < poleps) done = true;
256+
if (eps > epsold) done = true;
257+
if (iter >= politer) done = true;
294258

295259
// apply a "peek" iteration to the mutual induced dipoles
296-
if (done) {
297-
launch_k1s(g::s0, n, pcgPeek, n, pcgpeek, polarity, uind, uinp, rsd, rsdp);
298-
}
260+
if (done) launch_k1s(g::s0, n, pcgPeek, n, pcgpeek, polarity, uind, uinp, rsd, rsdp);
299261
}
300262

301263
// print the results from the conjugate gradient iteration

src/cu/aplus/pcg.cu

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "ff/amoeba/induce.h"
22
#include "ff/amoebamod.h"
33
#include "ff/aplus/induce.h"
4+
#include "ff/cuinduce.h"
45
#include "ff/switch.h"
56
#include "seq/launch.h"
67
#include "tool/error.h"
@@ -11,32 +12,6 @@
1112
#include <tinker/detail/units.hh>
1213

1314
namespace tinker {
14-
__global__
15-
void pcgUdirAplus(
16-
int n, const real* restrict polarity, real (*restrict udir)[3], const real (*restrict field)[3])
17-
{
18-
for (int i = ITHREAD; i < n; i += STRIDE) {
19-
real poli = polarity[i];
20-
#pragma unroll
21-
for (int j = 0; j < 3; ++j) {
22-
udir[i][j] = poli * field[i][j];
23-
}
24-
}
25-
}
26-
27-
__global__
28-
void pcgRsd4(int n, const real* restrict polarity_inv, //
29-
real (*restrict rsd)[3], //
30-
const real (*restrict udir)[3], const real (*restrict uind)[3], const real (*restrict field)[3])
31-
{
32-
for (int i = ITHREAD; i < n; i += STRIDE) {
33-
real poli_inv = polarity_inv[i];
34-
#pragma unroll
35-
for (int j = 0; j < 3; ++j)
36-
rsd[i][j] = (udir[i][j] - uind[i][j]) * poli_inv + field[i][j];
37-
}
38-
}
39-
4015
__global__
4116
void pcgRsd3(int n, const real* restrict polarity, real (*restrict rsd)[3])
4217
{
@@ -69,8 +44,7 @@ void pcgP8(int n, const real* restrict polarity, //
6944
{
7045
real kaval = *ka;
7146
real a = *ksum / kaval;
72-
if (kaval == 0)
73-
a = 0;
47+
if (kaval == 0) a = 0;
7448
for (int i = ITHREAD; i < n; i += STRIDE) {
7549
#pragma unroll
7650
for (int j = 0; j < 3; ++j) {
@@ -91,8 +65,7 @@ void pcgP9(int n, const real* restrict ksum, const real* restrict ksum1, real (*
9165
{
9266
real ksumval = *ksum;
9367
real b = *ksum1 / ksumval;
94-
if (ksumval == 0)
95-
b = 0;
68+
if (ksumval == 0) b = 0;
9669
for (int i = ITHREAD; i < n; i += STRIDE) {
9770
#pragma unroll
9871
for (int j = 0; j < 3; ++j)
@@ -131,7 +104,7 @@ void induceMutualPcg3_cu(real (*uind)[3])
131104
// get the electrostatic field due to permanent multipoles
132105
dfieldAplus(field);
133106
// direct induced dipoles
134-
launch_k1s(g::s0, n, pcgUdirAplus, n, polarity, udir, field);
107+
launch_k1s(g::s0, n, pcgUdirV1, n, polarity, udir, field);
135108

136109
// initial induced dipole
137110
if (predict) {
@@ -155,7 +128,7 @@ void induceMutualPcg3_cu(real (*uind)[3])
155128
// if do not use pcgguess, r(0) = E - T Zero = E
156129
if (predict) {
157130
ufieldAplus(uind, field);
158-
launch_k1s(g::s0, n, pcgRsd4, n, polarity_inv, rsd, udir, uind, field);
131+
launch_k1s(g::s0, n, pcgRsd0V1, n, polarity_inv, rsd, udir, uind, field);
159132
} else if (dirguess) {
160133
ufieldAplus(udir, rsd);
161134
} else {
@@ -241,16 +214,12 @@ void induceMutualPcg3_cu(real (*uind)[3])
241214
print(stdout, " %8d %-16.10f\n", iter, eps);
242215
}
243216

244-
if (eps < poleps)
245-
done = true;
246-
if (eps > epsold)
247-
done = true;
248-
if (iter >= politer)
249-
done = true;
217+
if (eps < poleps) done = true;
218+
if (eps > epsold) done = true;
219+
if (iter >= politer) done = true;
250220

251221
// apply a "peek" iteration to the mutual induced dipoles
252-
if (done)
253-
launch_k1s(g::s0, n, pcgPeek2, n, pcgpeek, polarity, uind, rsd);
222+
if (done) launch_k1s(g::s0, n, pcgPeek2, n, pcgpeek, polarity, uind, rsd);
254223
}
255224

256225
// print the results from the conjugate gradient iteration

src/cu/cmakesrc.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ hippo/expol.cu
3333
hippo/field.cu
3434
hippo/pcg.cu
3535
hippo/precond.cu
36+
induce.cu
3637
mathparallel.cu
3738
mathzero.cu
3839
mdhc.cu

src/cu/hippo/expol.cu

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,7 @@ void dexpol_cu1(int n, TINKER_IMAGE_PARAMS, VirialBuffer restrict vep, grad_prec
735735
}
736736

737737
void dexpol_cu(int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy, grad_prec* depz,
738-
VirialBuffer restrict vir_ep)
738+
VirialBuffer vir_ep)
739739
{
740740
const auto& st = *mspatial_v2_unit;
741741
real cut = switchCut(Switch::REPULS);
@@ -766,6 +766,7 @@ void dexpol_cu(int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy
766766
#include "ff/amoeba/induce.h"
767767
#include "ff/amoebamod.h"
768768
#include "ff/atom.h"
769+
#include "ff/cuinduce.h"
769770
#include "ff/hippo/induce.h"
770771
#include "ff/hippomod.h"
771772
#include "ff/switch.h"
@@ -780,20 +781,7 @@ void dexpol_cu(int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy
780781

781782
namespace tinker {
782783
__global__
783-
void eppcgUdirDonly(
784-
int n, const real* restrict polarity, real (*restrict udir)[3], const real (*restrict field)[3])
785-
{
786-
for (int i = ITHREAD; i < n; i += STRIDE) {
787-
real poli = polarity[i];
788-
#pragma unroll
789-
for (int j = 0; j < 3; ++j) {
790-
udir[i][j] = poli * field[i][j];
791-
}
792-
}
793-
}
794-
795-
__global__
796-
void eppcgUdirGuess(int n, const real* restrict polarity, real (*restrict uind)[3],
784+
static void eppcgUdirGuess(int n, const real* restrict polarity, real (*restrict uind)[3],
797785
const real (*restrict field)[3], const real (*restrict polinv)[3][3])
798786
{
799787
for (int i = ITHREAD; i < n; i += STRIDE) {
@@ -807,24 +795,6 @@ void eppcgUdirGuess(int n, const real* restrict polarity, real (*restrict uind)[
807795
}
808796
}
809797

810-
__global__
811-
void eppcgRsd2(int n, const real* restrict polarity_inv, //
812-
real (*restrict rsd)[3], //
813-
const real (*restrict udir)[3], const real (*restrict uind)[3], const real (*restrict field)[3],
814-
const real (*restrict polscale)[3][3])
815-
{
816-
for (int i = ITHREAD; i < n; i += STRIDE) {
817-
real poli_inv = polarity_inv[i];
818-
#pragma unroll
819-
for (int j = 0; j < 3; ++j) {
820-
rsd[i][j] = (udir[i][j] - uind[i][0] * polscale[i][0][j] - uind[i][1] * polscale[i][1][j] -
821-
uind[i][2] * polscale[i][2][j]) *
822-
poli_inv +
823-
field[i][j];
824-
}
825-
}
826-
}
827-
828798
__global__
829799
void eppcgRsd1(int n, const real* restrict polarity, real (*restrict rsd)[3])
830800
{
@@ -921,7 +891,7 @@ void induceMutualPcg4_cu(real (*uind)[3])
921891
// get the electrostatic field due to permanent multipoles
922892
dfieldChgpen(field);
923893
// direct induced dipoles
924-
launch_k1s(g::s0, n, eppcgUdirDonly, n, polarity, udir, field);
894+
launch_k1s(g::s0, n, pcgUdirV1, n, polarity, udir, field);
925895

926896
alterpol(polscale, polinv);
927897

@@ -936,7 +906,7 @@ void induceMutualPcg4_cu(real (*uind)[3])
936906

937907
if (predict) {
938908
ufieldChgpen(uind, field);
939-
launch_k1s(g::s0, n, eppcgRsd2, n, polarity_inv, rsd, udir, uind, field, polscale);
909+
launch_k1s(g::s0, n, pcgRsd0V3, n, polarity_inv, rsd, udir, uind, field, polscale);
940910
} else if (dirguess) {
941911
// uind is used here instead of udir since without exchange polarization udir = uind
942912
// but with exchange polarization udir != uind (for dirguess).

0 commit comments

Comments
 (0)