Skip to content

Commit b9c319f

Browse files
committed
implementation of cuda alterpol and dexpol
1 parent 37b8c72 commit b9c319f

File tree

10 files changed

+1288
-14
lines changed

10 files changed

+1288
-14
lines changed

ext/ext/yaml/alterpol_cu1.yaml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
KERNEL_NAME: alterpol_cu1
2+
3+
CUT_DISTANCE:
4+
- 'cut'
5+
6+
OFF_DISTANCE:
7+
- 'off'
8+
9+
EXCLUDE_INFO:
10+
- 'dinfo'
11+
12+
SCALE_1X_TYPE: real3_const_array,1
13+
14+
EXTRA_PARAMS: |
15+
, real (*restrict polscale)[9]
16+
, const real* restrict kpep, const real* restrict prepep
17+
, const real* restrict dmppep, const int* restrict lpep
18+
, ExpolScr scrtyp
19+
20+
I_POSITION:
21+
- def: shared real xi from:x
22+
- def: shared real yi from:y
23+
- def: shared real zi from:z
24+
K_POSITION:
25+
- def: register real xk from:x
26+
- def: register real yk from:y
27+
- def: register real zk from:z
28+
29+
I_FORCE:
30+
- def: shared real psci00 addto:polscale,0
31+
- def: shared real psci01 addto:polscale,1
32+
- def: shared real psci02 addto:polscale,2
33+
- def: shared real psci10 addto:polscale,3
34+
- def: shared real psci11 addto:polscale,4
35+
- def: shared real psci12 addto:polscale,5
36+
- def: shared real psci20 addto:polscale,6
37+
- def: shared real psci21 addto:polscale,7
38+
- def: shared real psci22 addto:polscale,8
39+
K_FORCE:
40+
- def: register real psck00 addto:polscale,0
41+
- def: register real psck01 addto:polscale,1
42+
- def: register real psck02 addto:polscale,2
43+
- def: register real psck10 addto:polscale,3
44+
- def: register real psck11 addto:polscale,4
45+
- def: register real psck12 addto:polscale,5
46+
- def: register real psck20 addto:polscale,6
47+
- def: register real psck21 addto:polscale,7
48+
- def: register real psck22 addto:polscale,8
49+
50+
I_VARIABLES:
51+
- def: shared real springi from:kpep
52+
- def: shared real sizi from:prepep
53+
- def: shared real alphai from:dmppep
54+
- def: shared int epli from:lpep
55+
K_VARIABLES:
56+
- def: register real springk from:kpep
57+
- def: register real sizk from:prepep
58+
- def: register real alphak from:dmppep
59+
- def: register int eplk from:lpep
60+
61+
FULL_PAIRWISE_INTERACTION: |
62+
real xr = xk - @xi@;
63+
real yr = yk - @yi@;
64+
real zr = zk - @zi@;
65+
real r2 = image2(xr, yr, zr);
66+
if ((eplk or @epli@) and r2 <= off * off and incl) {
67+
real r = REAL_SQRT(r2);
68+
real ks2i[3][3], ks2k[3][3];
69+
pair_alterpol(scrtyp, r, r2, scaleb, cut, off, xr, yr, zr, @springi@, @sizi@, @alphai@,
70+
springk, sizk, alphak, ks2i, ks2k);
71+
@psci00@ = ks2i[0][0];
72+
@psci01@ = ks2i[0][1];
73+
@psci02@ = ks2i[0][2];
74+
@psci10@ = ks2i[1][0];
75+
@psci11@ = ks2i[1][1];
76+
@psci12@ = ks2i[1][2];
77+
@psci20@ = ks2i[2][0];
78+
@psci21@ = ks2i[2][1];
79+
@psci22@ = ks2i[2][2];
80+
psck00 = ks2k[0][0];
81+
psck01 = ks2k[0][1];
82+
psck02 = ks2k[0][2];
83+
psck10 = ks2k[1][0];
84+
psck11 = ks2k[1][1];
85+
psck12 = ks2k[1][2];
86+
psck20 = ks2k[2][0];
87+
psck21 = ks2k[2][1];
88+
psck22 = ks2k[2][2];
89+
}

ext/ext/yaml/dexpol.yaml

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
KERNEL_NAME: dexpol_cu1
2+
TEMPLATE_PARAMS: template<class Ver>
3+
CONSTEXPR_FLAGS: |
4+
constexpr bool do_v = Ver::v;
5+
6+
VIRIAL:
7+
- vep
8+
GRADIENT:
9+
- gx
10+
- gy
11+
- gz
12+
13+
CUT_DISTANCE:
14+
- 'cut'
15+
16+
OFF_DISTANCE:
17+
- 'off'
18+
19+
EXCLUDE_INFO:
20+
- 'dinfo'
21+
22+
SCALE_1X_TYPE: real3_const_array,1
23+
24+
EXTRA_PARAMS: |
25+
, const real* restrict polarity, const real (*restrict uind)[3]
26+
, const real* restrict kpep, const real* restrict prepep
27+
, const real* restrict dmppep, const int* restrict lpep
28+
, ExpolScr scrtyp, real f
29+
30+
I_POSITION:
31+
- def: shared real xi from:x
32+
- def: shared real yi from:y
33+
- def: shared real zi from:z
34+
K_POSITION:
35+
- def: register real xk from:x
36+
- def: register real yk from:y
37+
- def: register real zk from:z
38+
39+
I_FORCE:
40+
- def: shared real frcxi addto:gx
41+
- def: shared real frcyi addto:gy
42+
- def: shared real frczi addto:gz
43+
44+
K_FORCE:
45+
- def: register real frcxk addto:gx
46+
- def: register real frcyk addto:gy
47+
- def: register real frczk addto:gz
48+
49+
I_VARIABLES:
50+
- def: shared real uix from:uind,0
51+
- def: shared real uiy from:uind,1
52+
- def: shared real uiz from:uind,2
53+
- def: shared real springi from:kpep
54+
- def: shared real sizi from:prepep
55+
- def: shared real alphai from:dmppep
56+
- def: shared int epli from:lpep
57+
- def: shared real poli from:polarity
58+
59+
K_VARIABLES:
60+
- def: register real ukx from:uind,0
61+
- def: register real uky from:uind,1
62+
- def: register real ukz from:uind,2
63+
- def: register real springk from:kpep
64+
- def: register real sizk from:prepep
65+
- def: register real alphak from:dmppep
66+
- def: register int eplk from:lpep
67+
- def: register real polk from:polarity
68+
69+
FULL_PAIRWISE_INTERACTION: |
70+
real xr = xk - @xi@;
71+
real yr = yk - @yi@;
72+
real zr = zk - @zi@;
73+
real r2 = image2(xr, yr, zr);
74+
if ((eplk or @epli@) and r2 <= off * off and incl) {
75+
real r = REAL_SQRT(r2);
76+
real frc[3];
77+
pair_dexpol(scrtyp, r, r2, scaleb, cut, off, xr, yr, zr, @uix@, @uiy@, @uiz@, ukx, uky, ukz,
78+
@springi@/@poli@, @sizi@, @alphai@, springk/polk, sizk, alphak, f, frc);
79+
@frcxi@ += frc[0];
80+
@frcyi@ += frc[1];
81+
@frczi@ += frc[2];
82+
frcxk -= frc[0];
83+
frcyk -= frc[1];
84+
frczk -= frc[2];
85+
86+
if CONSTEXPR (do_v) {
87+
real vxx = -xr * frc[0];
88+
real vxy = -0.5f * (yr * frc[0] + xr * frc[1]);
89+
real vxz = -0.5f * (zr * frc[0] + xr * frc[2]);
90+
real vyy = -yr * frc[1];
91+
real vyz = -0.5f * (zr * frc[1] + yr * frc[2]);
92+
real vzz = -zr * frc[2];
93+
veptlxx += floatTo<vbuf_prec>(vxx);
94+
veptlyx += floatTo<vbuf_prec>(vxy);
95+
veptlzx += floatTo<vbuf_prec>(vxz);
96+
veptlyy += floatTo<vbuf_prec>(vyy);
97+
veptlzy += floatTo<vbuf_prec>(vyz);
98+
veptlzz += floatTo<vbuf_prec>(vzz);
99+
}
100+
}

include/seq/pair_alterpol.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ inline void pair_alterpol(ExpolScr scrtyp, real r, real r2, real pscale, real cu
3838
ak[1] = -ai[1];
3939
ak[2] = -ai[2];
4040
#pragma acc loop seq
41-
for (int i{0}; i < 3; ++i) {
41+
for (int i = 0; i < 3; ++i) {
4242
#pragma acc loop seq
43-
for (int j{0}; j < 3; ++j) {
43+
for (int j = 0; j < 3; ++j) {
4444
ks2i[j][i] = p33i * ai[i] * ai[j];
4545
ks2k[j][i] = p33k * ak[i] * ak[j];
4646
}

src/acc/hippo/alterpol.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <tinker/routines.h>
1212

1313
namespace tinker {
14-
void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
14+
void alterpol_acc(real (*polscale)[3][3], real (*polinv)[3][3])
1515
{
1616
real cut = switchCut(Switch::REPULS);
1717
real off = switchOff(Switch::REPULS);
@@ -38,6 +38,7 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
3838
// find variable polarizability scale matrix at each site
3939
MAYBE_UNUSED int GRID_DIM = gpuGridSize(BLOCK_DIM);
4040
#pragma acc parallel async num_gangs(GRID_DIM) vector_length(BLOCK_DIM)\
41+
present(lvec1,lvec2,lvec3,recipa,recipb,recipc)\
4142
deviceptr(x,y,z,kpep,prepep,dmppep,lpep,mlst,polscale)
4243
#pragma acc loop gang independent
4344
for (int i = 0; i < n; ++i) {
@@ -72,15 +73,16 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
7273
for (int l = 0; l < 3; ++l) {
7374
#pragma acc loop seq
7475
for (int m = 0; m < 3; ++m) {
75-
polscale[i][m][l] += ks2i[m][l];
76-
polscale[k][m][l] += ks2k[m][l];
76+
atomic_add(ks2i[m][l], &polscale[i][m][l]);
77+
atomic_add(ks2k[m][l], &polscale[k][m][l]);
7778
}
7879
}
7980
}
8081
}
8182
}
8283

8384
#pragma acc parallel loop independent async\
85+
present(lvec1,lvec2,lvec3,recipa,recipb,recipc)\
8486
deviceptr(x,y,z,kpep,prepep,dmppep,lpep,mlst,mdwexclude,mdwexclude_scale,polscale)
8587
for (int ii = 0; ii < nmdwexclude; ++ii) {
8688
int i = mdwexclude[ii][0];
@@ -111,8 +113,8 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
111113
for (int l = 0; l < 3; ++l) {
112114
#pragma acc loop seq
113115
for (int m = 0; m < 3; ++m) {
114-
polscale[i][m][l] = polscale[i][m][l] + ks2i[m][l];
115-
polscale[k][m][l] = polscale[k][m][l] + ks2k[m][l];
116+
atomic_add(ks2i[m][l], &polscale[i][m][l]);
117+
atomic_add(ks2k[m][l], &polscale[k][m][l]);
116118
}
117119
}
118120
}
@@ -139,7 +141,7 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
139141
}
140142
}
141143

142-
void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy,
144+
void dexpol_acc(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy,
143145
grad_prec* depz, VirialBuffer restrict vir_ep)
144146
{
145147
auto do_v = vers & calc::virial;
@@ -156,7 +158,7 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
156158

157159
MAYBE_UNUSED int GRID_DIM = gpuGridSize(BLOCK_DIM);
158160
#pragma acc parallel async num_gangs(GRID_DIM) vector_length(BLOCK_DIM)\
159-
deviceptr(x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,vir_ep,mlst,polscale)
161+
deviceptr(x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,vir_ep,mlst)
160162
#pragma acc loop gang independent
161163
for (int i = 0; i < n; ++i) {
162164
real xi = x[i];
@@ -220,7 +222,7 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
220222

221223
#pragma acc parallel loop independent async\
222224
deviceptr(x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,\
223-
vir_ep,mlst,mdwexclude,mdwexclude_scale,polscale)
225+
vir_ep,mlst,mdwexclude,mdwexclude_scale)
224226
for (int ii = 0; ii < nmdwexclude; ++ii) {
225227
int offset = ii & (bufsize - 1);
226228

src/acc/hippo/expolinduce.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,12 @@ void induceMutualPcg4_acc(real (*uind)[3])
7878
#pragma acc parallel loop independent async\
7979
deviceptr(polarity_inv,udir,uind,field,polscale,rsd)
8080
for (int i = 0; i < n; ++i) {
81-
real pol = polarity_inv[i];
81+
real poli_inv = polarity_inv[i];
8282
#pragma acc loop seq
8383
for (int j = 0; j < 3; ++j) {
8484
rsd[i][j] = (udir[i][j] - uind[i][0] * polscale[i][0][j] -
8585
uind[i][1] * polscale[i][1][j] - uind[i][2] * polscale[i][2][j]) *
86-
pol +
86+
poli_inv +
8787
field[i][j];
8888
}
8989
}

src/cu/cmakesrc.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ ehal.cu
2323
elj.cu
2424
epolarrecip.cu
2525
evalence.cu
26+
hippo/alterpol.cu
2627
hippo/cflux.cu
2728
hippo/echgtrn.cu
2829
hippo/edisp.cu
2930
hippo/empole.cu
3031
hippo/epolar.cu
3132
hippo/erepel.cu
33+
hippo/expolpcg.cu
3234
hippo/field.cu
3335
hippo/pcg.cu
3436
hippo/precond.cu

0 commit comments

Comments
 (0)