Skip to content

Commit 2503915

Browse files
authored
Merge pull request #1 from zhi-wang/expol
Exchange Polarization
2 parents 342975c + 3784a1c commit 2503915

36 files changed

+1078
-955
lines changed

.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ BreakConstructorInitializers: BeforeComma
1010
PointerAlignment: Left
1111
AllowShortBlocksOnASingleLine: Always
1212
AllowShortFunctionsOnASingleLine: Empty
13+
AllowShortIfStatementsOnASingleLine: WithoutElse
1314

1415
MaxEmptyLinesToKeep: 1
1516
AccessModifierOffset: -3

ext/ext/yaml/alterpol_cu1.yaml

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,24 @@ FULL_PAIRWISE_INTERACTION: |
6666
if ((eplk or @epli@) and r2 <= off * off and incl) {
6767
real r = REAL_SQRT(r2);
6868
real ks2i[3][3], ks2k[3][3];
69-
pair_alterpol(scrtyp, r, r2, scaleb, cut, off, xr, yr, zr, @springi@, @sizi@, @alphai@,
69+
pair_alterpol(scrtyp, r, scaleb, cut, off, xr, yr, zr, @springi@, @sizi@, @alphai@,
7070
springk, sizk, alphak, ks2i, ks2k);
71-
@psci00@ = ks2i[0][0];
72-
@psci01@ = ks2i[0][1];
73-
@psci02@ = ks2i[0][2];
74-
@psci10@ = ks2i[1][0];
75-
@psci11@ = ks2i[1][1];
76-
@psci12@ = ks2i[1][2];
77-
@psci20@ = ks2i[2][0];
78-
@psci21@ = ks2i[2][1];
79-
@psci22@ = ks2i[2][2];
80-
psck00 = ks2k[0][0];
81-
psck01 = ks2k[0][1];
82-
psck02 = ks2k[0][2];
83-
psck10 = ks2k[1][0];
84-
psck11 = ks2k[1][1];
85-
psck12 = ks2k[1][2];
86-
psck20 = ks2k[2][0];
87-
psck21 = ks2k[2][1];
88-
psck22 = ks2k[2][2];
71+
@psci00@ += ks2i[0][0];
72+
@psci01@ += ks2i[0][1];
73+
@psci02@ += ks2i[0][2];
74+
@psci10@ += ks2i[1][0];
75+
@psci11@ += ks2i[1][1];
76+
@psci12@ += ks2i[1][2];
77+
@psci20@ += ks2i[2][0];
78+
@psci21@ += ks2i[2][1];
79+
@psci22@ += ks2i[2][2];
80+
psck00 += ks2k[0][0];
81+
psck01 += ks2k[0][1];
82+
psck02 += ks2k[0][2];
83+
psck10 += ks2k[1][0];
84+
psck11 += ks2k[1][1];
85+
psck12 += ks2k[1][2];
86+
psck20 += ks2k[2][0];
87+
psck21 += ks2k[2][1];
88+
psck22 += ks2k[2][2];
8989
}

ext/ext/yaml/dexpol.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ FULL_PAIRWISE_INTERACTION: |
7474
if ((eplk or @epli@) and r2 <= off * off and incl) {
7575
real r = REAL_SQRT(r2);
7676
real frc[3];
77-
pair_dexpol(scrtyp, r, r2, scaleb, cut, off, xr, yr, zr, @uix@, @uiy@, @uiz@, ukx, uky, ukz,
77+
pair_dexpol(scrtyp, r, scaleb, cut, off, xr, yr, zr, @uix@, @uiy@, @uiz@, ukx, uky, ukz,
7878
@springi@/@poli@, @sizi@, @alphai@, springk/polk, sizk, alphak, f, frc);
7979
@frcxi@ += frc[0];
8080
@frcyi@ += frc[1];

include/ff/cuinduce.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#pragma once
2+
#include "ff/precision.h"
3+
4+
namespace tinker {
5+
// udir = polarity * field
6+
7+
__global__
8+
void pcgUdirV1(int n, const real* polarity, //
9+
real (*udir)[3], const real (*field)[3]);
10+
11+
__global__
12+
void pcgUdirV2(int n, const real* polarity, //
13+
real (*udir)[3], real (*udirp)[3], const real (*field)[3], const real (*fieldp)[3]);
14+
15+
// r(0) = E - (1/polarity + Tu) u(0) = (udir - u(0))/polarity + mutual field
16+
17+
__global__
18+
void pcgRsd0V1(int n, const real* polarity_inv, real (*rsd)[3], //
19+
const real (*udir)[3], const real (*uind)[3], const real (*field)[3]);
20+
21+
__global__
22+
void pcgRsd0V2(int n, const real* polarity_inv, real (*rsd)[3], real (*rsp)[3], //
23+
const real (*udir)[3], const real (*udip)[3], const real (*uind)[3], const real (*uinp)[3],
24+
const real (*field)[3], const real (*fielp)[3]);
25+
26+
__global__
27+
void pcgRsd0V3(int n, const real* polarity_inv, real (*rsd)[3], //
28+
const real (*udir)[3], const real (*uind)[3], const real (*field)[3],
29+
const real (*polscale)[3][3]);
30+
}

include/ff/hippo/expol.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,5 @@ namespace tinker {
66
void expolData(RcOp);
77

88
void alterpol(real (*polscale)[3][3], real (*polinv)[3][3]);
9-
void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy, grad_prec* depz,
10-
VirialBuffer restrict vir_ep);
11-
12-
enum class ExpolScr
13-
{
14-
NONE,
15-
S2U,
16-
S2,
17-
G
18-
};
9+
void dexpol(int vers);
1910
}

include/ff/hippo/expolscr.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#pragma once
2+
3+
namespace tinker {
4+
enum class ExpolScr
5+
{
6+
NONE,
7+
S2U,
8+
S2,
9+
G,
10+
};
11+
}

include/ff/hippomod.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include "ff/energybuffer.h"
33
#include "ff/hippo/chgpen.h"
44
#include "ff/hippo/echgtrn.h"
5-
#include "ff/hippo/expol.h"
5+
#include "ff/hippo/expolscr.h"
66

77
// mplpot
88
namespace tinker {

include/seq/damp_hippo.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
#pragma once
2-
#include "ff/hippo/expol.h"
3-
#include "ff/hippomod.h"
42
#include "math/sinhc.h"
53
#include "seq/seq.h"
64

@@ -610,29 +608,4 @@ inline void damp_rep(real* restrict dmpik, real r, real rr1, real r2, real rr3,
610608
}
611609
// clang-format on
612610
}
613-
614-
#pragma acc routine seq
615-
SEQ_CUDA
616-
inline void damp_expl(ExpolScr scrtyp, real& restrict s2, real& restrict ds2, real r, real sizik,
617-
real alphai, real alphak, bool do_g)
618-
{
619-
real alphaik, dmpik2, dampik, dampik2, expik, s;
620-
621-
if (scrtyp == ExpolScr::S2U) {
622-
alphaik = REAL_SQRT(alphai * alphak);
623-
constexpr real inv2 = 1. / 2, inv3 = 1. / 3;
624-
constexpr real one = 1.;
625-
dmpik2 = inv2 * alphaik;
626-
dampik = dmpik2 * r;
627-
dampik2 = dampik * dampik;
628-
expik = REAL_EXP(-dampik);
629-
s = (one + dampik + dampik2 * inv3) * expik;
630-
s2 = s * s;
631-
if (do_g)
632-
ds2 = s * (-alphaik * inv3) * (dampik + dampik2) * expik;
633-
}
634-
s2 = sizik * s2;
635-
if (do_g)
636-
ds2 = sizik * ds2;
637-
}
638611
}

include/seq/pair_alterpol.h

Lines changed: 88 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,84 @@
11
#pragma once
2-
#include "ff/hippomod.h"
2+
#include "ff/hippo/expolscr.h"
3+
#include "math/sinhc.h"
34
#include "math/switch.h"
4-
#include "seq/damp_hippo.h"
5+
#include "seq/seq.h"
56

67
namespace tinker {
78
#pragma acc routine seq
9+
template <bool DO_G>
810
SEQ_CUDA
9-
inline void pair_alterpol(ExpolScr scrtyp, real r, real r2, real pscale, real cut, real off,
10-
real xr, real yr, real zr, real springi, real sizi, real alphai, real springk, real sizk,
11-
real alphak, real ks2i[3][3], real ks2k[3][3])
11+
inline void damp_expl(ExpolScr scrtyp, real& restrict s2, real& restrict ds2, real r, real sizik,
12+
real alphai, real alphak)
13+
{
14+
constexpr real inv2 = 1. / 2, inv3 = 1. / 3;
15+
constexpr real one = 1.;
16+
17+
if (scrtyp == ExpolScr::S2U) {
18+
real alphaik, dmpik2, dampik, dampik2, expik, s;
19+
alphaik = REAL_SQRT(alphai * alphak);
20+
dmpik2 = inv2 * alphaik;
21+
dampik = dmpik2 * r;
22+
dampik2 = dampik * dampik;
23+
expik = REAL_EXP(-dampik);
24+
s = (one + dampik + dampik2 * inv3) * expik;
25+
s2 = s * s;
26+
if (DO_G) ds2 = s * (-alphaik * inv3) * (dampik + dampik2) * expik;
27+
} else if (scrtyp == ExpolScr::S2) {
28+
real pfac = 2 / (alphai + alphak);
29+
real r2 = r * r;
30+
pfac = pfac * pfac;
31+
pfac = pfac * alphai * alphak;
32+
pfac = pfac * pfac * pfac;
33+
pfac *= r2;
34+
35+
real a = alphai * r / 2, b = alphak * r / 2;
36+
real c = (a + b) / 2, d = (b - a) / 2;
37+
real expmc = REAL_EXP(-c);
38+
39+
real c2 = c * c;
40+
real d2 = d * d;
41+
real c2d2 = (c * d) * (c * d);
42+
real f1d, f2d, f3d;
43+
fsinhc3(d, f1d, f2d, f3d);
44+
45+
real s;
46+
s = f1d * (c + 1) + f2d * c2;
47+
s /= r;
48+
s *= expmc;
49+
s2 = pfac * s * s;
50+
51+
if (DO_G) {
52+
real ds;
53+
ds = f1d * c2 + f2d * ((c - 2) * c2 - (c + 1) * d2) - f3d * c2d2;
54+
ds /= -r2;
55+
ds *= expmc;
56+
ds2 = pfac * 2 * s * ds;
57+
}
58+
59+
} else if (scrtyp == ExpolScr::G) {
60+
real alphaik = REAL_SQRT(alphai * alphak);
61+
s2 = REAL_EXP(-alphaik / (real)10 * r * r);
62+
if (DO_G) ds2 = (-alphaik / (real)5) * r * s2;
63+
}
64+
65+
s2 = sizik * s2;
66+
if (DO_G) ds2 = sizik * ds2;
67+
}
68+
69+
SEQ_ROUTINE
70+
inline void pair_alterpol(ExpolScr scrtyp, real r, real pscale, real cut, real off, real xr,
71+
real yr, real zr, real springi, real sizi, real alphai, real springk, real sizk, real alphak,
72+
real ks2i[3][3], real ks2k[3][3])
1273
{
13-
real cut2 = cut * cut;
1474
real sizik = sizi * sizk;
1575
real s2;
1676
real ds2;
17-
bool do_g = false;
1877

19-
damp_expl(scrtyp, s2, ds2, r, sizik, alphai, alphak, do_g);
78+
constexpr bool DO_G = false;
79+
damp_expl<DO_G>(scrtyp, s2, ds2, r, sizik, alphai, alphak);
2080

21-
if (r2 > cut2) {
81+
if (r > cut) {
2282
real taper, dtaper;
2383
switchTaper5<0>(r, cut, off, taper, dtaper);
2484
s2 = s2 * taper;
@@ -28,40 +88,35 @@ inline void pair_alterpol(ExpolScr scrtyp, real r, real r2, real pscale, real cu
2888
p33i = springi * s2 * pscale;
2989
p33k = springk * s2 * pscale;
3090

31-
real ai[3], ak[3];
91+
real ai[3]; // ak = -ai
3292

3393
ai[0] = xr / r;
3494
ai[1] = yr / r;
3595
ai[2] = zr / r;
3696

37-
ak[0] = -ai[0];
38-
ak[1] = -ai[1];
39-
ak[2] = -ai[2];
4097
#pragma acc loop seq
4198
for (int i = 0; i < 3; ++i) {
4299
#pragma acc loop seq
43100
for (int j = 0; j < 3; ++j) {
44101
ks2i[j][i] = p33i * ai[i] * ai[j];
45-
ks2k[j][i] = p33k * ak[i] * ak[j];
102+
ks2k[j][i] = p33k * ai[i] * ai[j]; // ak_i * ak_j = ai_i * ai_j
46103
}
47104
}
48105
}
49106

50-
#pragma acc routine seq
51-
SEQ_CUDA
52-
inline void pair_dexpol(ExpolScr scrtyp, real r, real r2, real pscale, real cut, real off, real xr,
53-
real yr, real zr, real uix, real uiy, real uiz, real ukx, real uky, real ukz, real springi,
54-
real sizi, real alphai, real springk, real sizk, real alphak, const real f, real frc[3])
107+
SEQ_ROUTINE
108+
inline void pair_dexpol(ExpolScr scrtyp, real r, real pscale, real cut, real off, real xr, real yr,
109+
real zr, real uix, real uiy, real uiz, real ukx, real uky, real ukz, real springi, real sizi,
110+
real alphai, real springk, real sizk, real alphak, const real f, real frc[3])
55111
{
56-
real cut2 = cut * cut;
57112
real sizik = sizi * sizk;
58113
real s2;
59114
real ds2;
60-
bool do_g = true;
61115

62-
damp_expl(scrtyp, s2, ds2, r, sizik, alphai, alphak, do_g);
116+
constexpr bool DO_G = true;
117+
damp_expl<DO_G>(scrtyp, s2, ds2, r, sizik, alphai, alphak);
63118

64-
if (r2 > cut2) {
119+
if (r > cut) {
65120
real taper, dtaper;
66121
switchTaper5<1>(r, cut, off, taper, dtaper);
67122
ds2 = ds2 * taper + s2 * dtaper;
@@ -73,7 +128,7 @@ inline void pair_dexpol(ExpolScr scrtyp, real r, real r2, real pscale, real cut,
73128
real ds2k = springk * ds2 * pscale;
74129

75130
// compute rotation matrix
76-
real ai[3][3], ak[3][3];
131+
real ai[3][3];
77132
ai[0][2] = xr / r;
78133
ai[1][2] = yr / r;
79134
ai[2][2] = zr / r;
@@ -97,26 +152,18 @@ inline void pair_dexpol(ExpolScr scrtyp, real r, real r2, real pscale, real cut,
97152
ai[0][1] = ai[2][0] * ai[1][2] - ai[1][0] * ai[2][2];
98153
ai[1][1] = ai[0][0] * ai[2][2] - ai[2][0] * ai[0][2];
99154
ai[2][1] = ai[1][0] * ai[0][2] - ai[0][0] * ai[1][2];
100-
ak[0][0] = ai[0][0];
101-
ak[1][0] = ai[1][0];
102-
ak[2][0] = ai[2][0];
103-
ak[0][1] = -ai[0][1];
104-
ak[1][1] = -ai[1][1];
105-
ak[2][1] = -ai[2][1];
106-
ak[0][2] = -ai[0][2];
107-
ak[1][2] = -ai[1][2];
108-
ak[2][2] = -ai[2][2];
155+
// ak[][0] = ai[][0], ak[][1] = -ai[][1], ak[][2] = -ai[][2]
109156

110157
// local frame force
111158
real frcil[3], frckl[3];
112159
real uixl = uix * ai[0][0] + uiy * ai[1][0] + uiz * ai[2][0];
113160
real uiyl = uix * ai[0][1] + uiy * ai[1][1] + uiz * ai[2][1];
114161
real uizl = uix * ai[0][2] + uiy * ai[1][2] + uiz * ai[2][2];
115-
real ukxl = -(ukx * ak[0][0] + uky * ak[1][0] + ukz * ak[2][0]);
116-
real ukyl = -(ukx * ak[0][1] + uky * ak[1][1] + ukz * ak[2][1]);
117-
real ukzl = -(ukx * ak[0][2] + uky * ak[1][2] + ukz * ak[2][2]);
118-
frcil[2] = REAL_POW(uizl, 2) * ds2i;
119-
frckl[2] = REAL_POW(ukzl, 2) * ds2k;
162+
real ukxl = -(ukx * ai[0][0] + uky * ai[1][0] + ukz * ai[2][0]);
163+
real ukyl = ukx * ai[0][1] + uky * ai[1][1] + ukz * ai[2][1];
164+
real ukzl = ukx * ai[0][2] + uky * ai[1][2] + ukz * ai[2][2];
165+
frcil[2] = uizl * uizl * ds2i;
166+
frckl[2] = ukzl * ukzl * ds2k;
120167
// local frame torque
121168
constexpr real two = 2.;
122169
real tqxil = two * uiyl * uizl * s2i;
@@ -132,9 +179,9 @@ inline void pair_dexpol(ExpolScr scrtyp, real r, real r2, real pscale, real cut,
132179
real frcxi = ai[0][0] * frcil[0] + ai[0][1] * frcil[1] + ai[0][2] * frcil[2];
133180
real frcyi = ai[1][0] * frcil[0] + ai[1][1] * frcil[1] + ai[1][2] * frcil[2];
134181
real frczi = ai[2][0] * frcil[0] + ai[2][1] * frcil[1] + ai[2][2] * frcil[2];
135-
real frcxk = ak[0][0] * frckl[0] + ak[0][1] * frckl[1] + ak[0][2] * frckl[2];
136-
real frcyk = ak[1][0] * frckl[0] + ak[1][1] * frckl[1] + ak[1][2] * frckl[2];
137-
real frczk = ak[2][0] * frckl[0] + ak[2][1] * frckl[1] + ak[2][2] * frckl[2];
182+
real frcxk = ai[0][0] * frckl[0] - ai[0][1] * frckl[1] - ai[0][2] * frckl[2];
183+
real frcyk = ai[1][0] * frckl[0] - ai[1][1] * frckl[1] - ai[1][2] * frckl[2];
184+
real frczk = ai[2][0] * frckl[0] - ai[2][1] * frckl[1] - ai[2][2] * frckl[2];
138185
frc[0] = f * (frcxk - frcxi);
139186
frc[1] = f * (frcyk - frcyi);
140187
frc[2] = f * (frczk - frczi);

include/tool/macro.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
#ifdef TINKER_CLANG
3939
# pragma clang diagnostic ignored "-Wextern-c-compat"
4040
#endif
41+
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
42+
// #20199-D: unrecognized #pragma in device code
43+
#pragma nv_diag_suppress 20199
44+
#endif
4145

4246
//====================================================================//
4347

0 commit comments

Comments
 (0)