Skip to content

Commit cd82586

Browse files
committed
implementation of openACC expol energies
1 parent 848b368 commit cd82586

File tree

7 files changed

+220
-67
lines changed

7 files changed

+220
-67
lines changed

include/ff/hippo/expol.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
namespace tinker {
66
void expolData(RcOp);
77

8-
void alterpol();
8+
void alterpol(real (*polscale)[3][3], real (*polinv)[3][3]);
9+
void dexpol();
910

1011
enum class ExpolScr
1112
{

include/ff/hippomod.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,6 @@ TINKER_EXTERN int nmdwexclude;
4141
TINKER_EXTERN int (*mdwexclude)[2];
4242
TINKER_EXTERN real (*mdwexclude_scale)[3];
4343

44-
TINKER_EXTERN int ndexclude;
45-
TINKER_EXTERN int (*dexclude)[2];
46-
TINKER_EXTERN real* dexclude_scale;
47-
4844
TINKER_EXTERN int nwexclude;
4945
TINKER_EXTERN int (*wexclude)[2];
5046
TINKER_EXTERN real* wexclude_scale;

include/seq/damp_hippo.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#pragma once
2+
#include "ff/hippo/expol.h"
3+
#include "ff/hippomod.h"
24
#include "math/sinhc.h"
35
#include "seq/seq.h"
46

@@ -608,4 +610,27 @@ inline void damp_rep(real* restrict dmpik, real r, real rr1, real r2, real rr3,
608610
}
609611
// clang-format on
610612
}
613+
614+
#pragma acc routine seq
615+
SEQ_CUDA
616+
inline void damp_expl(
617+
ExpolScr scrtyp, real& restrict s2, real& restrict ds2, real r, real sizik, real alphai, real alphak)
618+
{
619+
real alphaik, dmpik2, dampik, dampik2, expik, s;
620+
621+
if (scrtyp == ExpolScr::S2U) {
622+
alphaik = REAL_SQRT(alphai * alphak);
623+
real inv2 = 1. / 2, inv3 = 1. / 3;
624+
real one = 1.;
625+
dmpik2 = inv2 * alphaik;
626+
dampik = dmpik2 * r;
627+
dampik2 = dampik * dampik;
628+
expik = REAL_EXP(-dampik);
629+
s = (one + dampik + dampik2 * inv3) * expik;
630+
s2 = s * s;
631+
ds2 = s * (-alphaik * inv3) * (dampik + dampik2) * expik;
632+
}
633+
s2 = sizik * s2;
634+
ds2 = sizik * ds2;
635+
}
611636
}

include/seq/pair_alterpol.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#pragma once
2+
#include "math/switch.h"
3+
#include "seq/damp_hippo.h"
4+
#include "ff/hippomod.h"
5+
6+
namespace tinker {
7+
#pragma acc routine seq
8+
SEQ_CUDA
9+
inline void pair_alterpol(ExpolScr scrtyp, real r, real r2, real pscale, real cut, real off, real xr, real yr, real zr,
10+
real springi, real sizi, real alphai, real springk, real sizk, real alphak,
11+
real ks2i[3][3], real ks2k[3][3])
12+
{
13+
real cut2 = cut * cut;
14+
real sizik = sizi * sizk;
15+
real s2;
16+
real ds2;
17+
18+
damp_expl(scrtyp, s2, ds2, r, sizik, alphai, alphak);
19+
20+
if (r2 > cut2) {
21+
real taper, dtaper;
22+
switchTaper5<0>(r, cut, off, taper, dtaper);
23+
s2 = s2 * taper;
24+
}
25+
real p33i, p33k;
26+
p33i = springi * s2 * pscale;
27+
p33k = springk * s2 * pscale;
28+
29+
real ai[3], ak[3];
30+
31+
ai[0] = xr / r;
32+
ai[1] = yr / r;
33+
ai[2] = zr / r;
34+
35+
ak[0] = -ai[0];
36+
ak[1] = -ai[1];
37+
ak[2] = -ai[2];
38+
#pragma acc loop seq
39+
for (int i{0}; i < 3; ++i) {
40+
#pragma acc loop seq
41+
for (int j{0}; j < 3; ++j) {
42+
ks2i[j][i] = p33i * ai[i] * ai[j];
43+
ks2k[j][i] = p33k * ak[i] * ak[j];
44+
}
45+
}
46+
}
47+
}

src/acc/hippo/alterpol.cpp

Lines changed: 119 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,156 @@
1-
//#include "ff/amoebamod.h"
1+
#include "ff/amoebamod.h"
22
#include "ff/atom.h"
33
#include "ff/hippomod.h"
44
#include "ff/image.h"
55
#include "ff/nblist.h"
66
//#include "ff/pme.h"
77
#include "ff/switch.h"
8-
#include "seq/pair_polar_chgpen.h"
9-
//#include "tool/gpucard.h"
10-
#include <array>
11-
#include <fstream>
8+
#include "seq/pair_alterpol.h"
9+
#include <tinker/routines.h>
10+
// #include "seq/pair_polar_chgpen.h"
11+
#include "tool/gpucard.h"
12+
// #include <array>
13+
// #include <fstream>
1214

1315
namespace tinker {
14-
void alterpol()
16+
void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
1517
{
1618
real cut = switchCut(Switch::REPULS);
1719
real off = switchOff(Switch::REPULS);
1820

1921
const real off2 = off * off;
20-
const int maxnlst = mlist_unit->maxnlst; // question
21-
const auto* mlst = mlist_unit.deviceptr(); // question
22-
23-
size_t bufsize = bufferSize(); // question
24-
PairPolarGrad pgrad; // question
22+
const int maxnlst = mlist_unit->maxnlst;
23+
const auto* mlst = mlist_unit.deviceptr();
2524

2625
// initialize polscale and polinv
26+
#pragma acc parallel loop independent async\
27+
deviceptr(polscale)
2728
for (int i = 0; i < n; ++i) {
29+
polscale[i][0][0] = 1.f;
30+
polscale[i][0][1] = 0.f;
31+
polscale[i][0][2] = 0.f;
32+
polscale[i][1][0] = 0.f;
2833
polscale[i][1][1] = 1.f;
2934
polscale[i][1][2] = 0.f;
30-
polscale[i][1][3] = 0.f;
35+
polscale[i][2][0] = 0.f;
3136
polscale[i][2][1] = 0.f;
3237
polscale[i][2][2] = 1.f;
33-
polscale[i][2][3] = 0.f;
34-
polscale[i][3][1] = 0.f;
35-
polscale[i][3][2] = 0.f;
36-
polscale[i][3][3] = 1.f;
37-
polinv[i][1][1] = 1.f;
38-
polinv[i][1][2] = 0.f;
39-
polinv[i][1][3] = 0.f;
40-
polinv[i][2][1] = 0.f;
41-
polinv[i][2][2] = 1.f;
42-
polinv[i][2][3] = 0.f;
43-
polinv[i][3][1] = 0.f;
44-
polinv[i][3][2] = 0.f;
45-
polinv[i][3][3] = 1.f;
4638
}
4739

4840
// find variable polarizability scale matrix at each site
41+
MAYBE_UNUSED int GRID_DIM = gpuGridSize(BLOCK_DIM);
42+
#pragma acc parallel async num_gangs(GRID_DIM) vector_length(BLOCK_DIM)\
43+
deviceptr(x,y,z,kpep,prepep,dmppep,lpep,mlst,polscale)
44+
#pragma acc loop gang independent
4945
for (int i = 0; i < n; ++i) {
5046
real springi = kpep[i];
5147
real sizi = prepep[i];
5248
real alphai = dmppep[i];
5349
int epli = lpep[i];
50+
real xi = x[i];
51+
real yi = y[i];
52+
real zi = z[i];
5453

55-
int nmlsti = mlst->nlst[i]; // question
56-
int base = i * maxnlst; // question
54+
int nmlsti = mlst->nlst[i];
55+
int base = i * maxnlst;
56+
#pragma acc loop vector independent
5757
for (int kk = 0; kk < nmlsti; ++kk) {
58-
int offset = kk & (bufsize - 1); // question (copied from epolar.cpp)
59-
int k = mlst->lst[base + kk]; // question
58+
int k = mlst->lst[base + kk];
59+
real xr = x[k] - xi;
60+
real yr = y[k] - yi;
61+
real zr = z[k] - zi;
62+
real r2 = image2(xr, yr, zr);
6063
int eplk = lpep[k];
61-
if (epli || eplk) {
62-
real xr = x[k] - x[i];
63-
real yr = y[k] - y[i];
64-
real zr = z[k] - z[i];
64+
bool incl = (epli || eplk);
65+
if (r2 <= off2 and incl) {
66+
real r = REAL_SQRT(r2);
67+
real springk = kpep[k];
68+
real sizk = prepep[k];
69+
real alphak = dmppep[k];
70+
real ks2i[3][3], ks2k[3][3];
71+
pair_alterpol(scrtyp, r, r2, 1, cut, off, xr, yr, zr, springi, sizi, alphai, springk,
72+
sizk, alphak, ks2i, ks2k);
73+
#pragma acc loop seq
74+
for (int l = 0; l < 3; ++l) {
75+
#pragma acc loop seq
76+
for (int m = 0; m < 3; ++m) {
77+
polscale[i][m][l] = polscale[i][m][l] + ks2i[m][l];
78+
polscale[k][m][l] = polscale[k][m][l] + ks2k[m][l];
79+
}
80+
}
81+
}
82+
}
83+
}
84+
85+
#pragma acc parallel loop independent async\
86+
deviceptr(x,y,z,kpep,prepep,dmppep,lpep,mlst,mdwexclude,mdwexclude_scale,polscale)
87+
for (int ii = 0; ii < nmdwexclude; ++ii) {
88+
int i = mdwexclude[ii][0];
89+
int k = mdwexclude[ii][1];
90+
real dscale = mdwexclude_scale[ii][1] - 1;
91+
92+
real springi = kpep[i];
93+
real sizi = prepep[i];
94+
real alphai = dmppep[i];
95+
int epli = lpep[i];
6596

66-
zero(pgrad); // question
67-
real r2 = image2(xr, yr, zr);
68-
if (r2 <= off2) {
69-
real r = REAL_SQRT(r2);
70-
real sizk = prepep[k];
71-
real alphak = dmppep[k];
72-
// call dampexpl
97+
real springk = kpep[k];
98+
real sizk = prepep[k];
99+
real alphak = dmppep[k];
100+
int eplk = lpep[k];
101+
102+
real xr = x[k] - x[i];
103+
real yr = y[k] - y[i];
104+
real zr = z[k] - z[i];
105+
106+
real r2 = image2(xr, yr, zr);
107+
bool incl1 = dscale != 0;
108+
bool incl2 = (epli || eplk);
109+
if (r2 <= off2 and incl1 and incl2) {
110+
real r = REAL_SQRT(r2);
111+
real ks2i[3][3], ks2k[3][3];
112+
pair_alterpol(scrtyp, r, r2, 1, cut, off, xr, yr, zr, springi, sizi, alphai, springk, sizk,
113+
alphak, ks2i, ks2k);
114+
#pragma acc loop seq
115+
for (int l = 0; l < 3; ++l) {
116+
#pragma acc loop seq
117+
for (int m = 0; m < 3; ++m) {
118+
polscale[i][m][l] = polscale[i][m][l] + ks2i[m][l];
119+
polscale[k][m][l] = polscale[k][m][l] + ks2k[m][l];
73120
}
74121
}
75122
}
76123
}
124+
125+
// invert
126+
#pragma acc parallel loop independent async\
127+
deviceptr(polscale,polinv)
128+
for (int i = 0; i < n; ++i) {
129+
real tmp[3][3];
130+
#pragma acc loop seq
131+
for (int j = 0; j < 3; ++j) {
132+
#pragma acc loop seq
133+
for (int k = 0; k < 3; ++k) {
134+
tmp[j][k] = polscale[i][j][k];
135+
}
136+
}
137+
real det;
138+
det = tmp[0][0] * (tmp[1][1] * tmp[2][2] - tmp[1][2] * tmp[2][1]) -
139+
tmp[1][0] * (tmp[0][1] * tmp[2][2] - tmp[2][1] * tmp[0][2]) +
140+
tmp[2][0] * (tmp[0][1] * tmp[1][2] - tmp[1][1] * tmp[0][2]);
141+
polinv[i][0][0] = (tmp[1][1] * tmp[2][2] - tmp[1][2] * tmp[2][1]) / det;
142+
polinv[i][1][0] = (tmp[2][0] * tmp[1][2] - tmp[1][0] * tmp[2][2]) / det;
143+
polinv[i][2][0] = (tmp[1][0] * tmp[2][1] - tmp[2][0] * tmp[1][1]) / det;
144+
polinv[i][0][1] = (tmp[2][1] * tmp[0][2] - tmp[0][1] * tmp[2][2]) / det;
145+
polinv[i][1][1] = (tmp[0][0] * tmp[2][2] - tmp[2][0] * tmp[0][2]) / det;
146+
polinv[i][2][1] = (tmp[0][1] * tmp[2][0] - tmp[0][0] * tmp[2][1]) / det;
147+
polinv[i][0][2] = (tmp[0][1] * tmp[1][2] - tmp[0][2] * tmp[1][1]) / det;
148+
polinv[i][1][2] = (tmp[0][2] * tmp[1][0] - tmp[0][0] * tmp[1][2]) / det;
149+
polinv[i][2][2] = (tmp[0][0] * tmp[1][1] - tmp[0][1] * tmp[1][0]) / det;
150+
}
151+
}
152+
153+
void dexpol() {
154+
155+
}
77156
}
78-
}

0 commit comments

Comments
 (0)