11
11
#include < tinker/routines.h>
12
12
13
13
namespace tinker {
14
- void alterpol (real (*polscale)[3][3], real (*polinv)[3][3])
14
+ void alterpol_acc (real (*polscale)[3][3], real (*polinv)[3][3])
15
15
{
16
16
real cut = switchCut (Switch::REPULS);
17
17
real off = switchOff (Switch::REPULS);
@@ -38,6 +38,7 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
38
38
// find variable polarizability scale matrix at each site
39
39
MAYBE_UNUSED int GRID_DIM = gpuGridSize (BLOCK_DIM);
40
40
#pragma acc parallel async num_gangs(GRID_DIM) vector_length(BLOCK_DIM)\
41
+ present (lvec1,lvec2,lvec3,recipa,recipb,recipc)\
41
42
deviceptr (x,y,z,kpep,prepep,dmppep,lpep,mlst,polscale)
42
43
#pragma acc loop gang independent
43
44
for (int i = 0 ; i < n; ++i) {
@@ -72,15 +73,16 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
72
73
for (int l = 0 ; l < 3 ; ++l) {
73
74
#pragma acc loop seq
74
75
for (int m = 0 ; m < 3 ; ++m) {
75
- polscale[i][ m][l] += ks2i[ m][l];
76
- polscale[k][ m][l] += ks2k[ m][l];
76
+ atomic_add (ks2i[ m][l], &polscale[i][ m][l]) ;
77
+ atomic_add (ks2k[ m][l], &polscale[k][ m][l]) ;
77
78
}
78
79
}
79
80
}
80
81
}
81
82
}
82
83
83
84
#pragma acc parallel loop independent async\
85
+ present (lvec1,lvec2,lvec3,recipa,recipb,recipc)\
84
86
deviceptr (x,y,z,kpep,prepep,dmppep,lpep,mlst,mdwexclude,mdwexclude_scale,polscale)
85
87
for (int ii = 0 ; ii < nmdwexclude; ++ii) {
86
88
int i = mdwexclude[ii][0 ];
@@ -111,8 +113,8 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
111
113
for (int l = 0 ; l < 3 ; ++l) {
112
114
#pragma acc loop seq
113
115
for (int m = 0 ; m < 3 ; ++m) {
114
- polscale[i][ m][l] = polscale[i][m][l] + ks2i[m][l] ;
115
- polscale[k][ m][l] = polscale[k][m][l] + ks2k[m][l] ;
116
+ atomic_add (ks2i[ m][l], & polscale[i][m][l]) ;
117
+ atomic_add (ks2k[ m][l], & polscale[k][m][l]) ;
116
118
}
117
119
}
118
120
}
@@ -139,7 +141,7 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
139
141
}
140
142
}
141
143
142
- void dexpol (const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy,
144
+ void dexpol_acc (const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy,
143
145
grad_prec* depz, VirialBuffer restrict vir_ep)
144
146
{
145
147
auto do_v = vers & calc::virial;
@@ -156,7 +158,7 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
156
158
157
159
MAYBE_UNUSED int GRID_DIM = gpuGridSize (BLOCK_DIM);
158
160
#pragma acc parallel async num_gangs(GRID_DIM) vector_length(BLOCK_DIM)\
159
- deviceptr (x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,vir_ep,mlst,polscale )
161
+ deviceptr (x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,vir_ep,mlst)
160
162
#pragma acc loop gang independent
161
163
for (int i = 0 ; i < n; ++i) {
162
164
real xi = x[i];
@@ -220,7 +222,7 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
220
222
221
223
#pragma acc parallel loop independent async\
222
224
deviceptr (x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,\
223
- vir_ep,mlst,mdwexclude,mdwexclude_scale,polscale )
225
+ vir_ep,mlst,mdwexclude,mdwexclude_scale)
224
226
for (int ii = 0 ; ii < nmdwexclude; ++ii) {
225
227
int offset = ii & (bufsize - 1 );
226
228
0 commit comments