@@ -105,8 +105,8 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
105
105
if (r2 <= off2 and incl1 and incl2) {
106
106
real r = REAL_SQRT (r2);
107
107
real ks2i[3 ][3 ], ks2k[3 ][3 ];
108
- pair_alterpol (scrtyp, r, r2, dscale, cut, off, xr, yr, zr, springi, sizi, alphai, springk, sizk,
109
- alphak, ks2i, ks2k);
108
+ pair_alterpol (scrtyp, r, r2, dscale, cut, off, xr, yr, zr, springi, sizi, alphai, springk,
109
+ sizk, alphak, ks2i, ks2k);
110
110
#pragma acc loop seq
111
111
for (int l = 0 ; l < 3 ; ++l) {
112
112
#pragma acc loop seq
@@ -139,8 +139,8 @@ void alterpol(real (*polscale)[3][3], real (*polinv)[3][3])
139
139
}
140
140
}
141
141
142
- void dexpol (const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy, grad_prec* depz,
143
- VirialBuffer restrict vir_ep)
142
+ void dexpol (const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* depy,
143
+ grad_prec* depz, VirialBuffer restrict vir_ep)
144
144
{
145
145
auto do_v = vers & calc::virial;
146
146
real cut = switchCut (Switch::REPULS);
@@ -154,11 +154,15 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
154
154
155
155
const real f = 0 .5f * electric / dielec;
156
156
157
+ MAYBE_UNUSED int GRID_DIM = gpuGridSize (BLOCK_DIM);
158
+ #pragma acc parallel async num_gangs(GRID_DIM) vector_length(BLOCK_DIM)\
159
+ deviceptr (x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,vir_ep,mlst,polscale)
160
+ #pragma acc loop gang independent
157
161
for (int i = 0 ; i < n; ++i) {
158
162
real xi = x[i];
159
163
real yi = y[i];
160
164
real zi = z[i];
161
- real springi = kpep[i]/ polarity[i];
165
+ real springi = kpep[i] / polarity[i];
162
166
real sizi = prepep[i];
163
167
real alphai = dmppep[i];
164
168
int epli = lpep[i];
@@ -170,6 +174,7 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
170
174
171
175
int nmlsti = mlst->nlst [i];
172
176
int base = i * maxnlst;
177
+ #pragma acc loop vector independent
173
178
for (int kk = 0 ; kk < nmlsti; ++kk) {
174
179
int offset = kk & (bufsize - 1 );
175
180
int k = mlst->lst [base + kk];
@@ -181,7 +186,7 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
181
186
bool incl = (epli || eplk);
182
187
if (r2 <= off2 and incl) {
183
188
real r = REAL_SQRT (r2);
184
- real springk = kpep[k]/ polarity[k];
189
+ real springk = kpep[k] / polarity[k];
185
190
real sizk = prepep[k];
186
191
real alphak = dmppep[k];
187
192
real ukx = uind[k][0 ];
@@ -213,6 +218,9 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
213
218
atomic_add (gzi, depz, i);
214
219
}
215
220
221
+ #pragma acc parallel loop independent async\
222
+ deviceptr (x,y,z,polarity,kpep,prepep,dmppep,lpep,uind,depx,depy,depz,\
223
+ vir_ep,mlst,mdwexclude,mdwexclude_scale,polscale)
216
224
for (int ii = 0 ; ii < nmdwexclude; ++ii) {
217
225
int offset = ii & (bufsize - 1 );
218
226
@@ -245,16 +253,16 @@ void dexpol(const int vers, const real (*uind)[3], grad_prec* depx, grad_prec* d
245
253
if (r2 <= off2 and incl1 and incl2) {
246
254
real r = REAL_SQRT (r2);
247
255
real frc[3 ];
248
- pair_dexpol (scrtyp, r, r2, dscale, cut, off, xr, yr, zr, uix, uiy, uiz, ukx, uky, ukz, springi,
249
- sizi, alphai, springk, sizk, alphak, f, frc);
256
+ pair_dexpol (scrtyp, r, r2, dscale, cut, off, xr, yr, zr, uix, uiy, uiz, ukx, uky, ukz,
257
+ springi, sizi, alphai, springk, sizk, alphak, f, frc);
250
258
251
259
atomic_add (frc[0 ], depx, i);
252
260
atomic_add (frc[1 ], depy, i);
253
261
atomic_add (frc[2 ], depz, i);
254
262
atomic_add (-frc[0 ], depx, k);
255
263
atomic_add (-frc[1 ], depy, k);
256
264
atomic_add (-frc[2 ], depz, k);
257
-
265
+
258
266
if (do_v) {
259
267
real vxx = -xr * frc[0 ];
260
268
real vxy = -0 .5f * (yr * frc[0 ] + xr * frc[1 ]);
0 commit comments