@@ -111,24 +111,24 @@ static void alterpol_cu1(int n, TINKER_IMAGE_PARAMS, real cut, real off,
111
111
real ks2i[3 ][3 ], ks2k[3 ][3 ];
112
112
pair_alterpol (scrtyp, r, scaleb, cut, off, xr, yr, zr, springi[klane], sizi[klane],
113
113
alphai[klane], springk, sizk, alphak, ks2i, ks2k);
114
- psci00[klane] = ks2i[0 ][0 ];
115
- psci01[klane] = ks2i[0 ][1 ];
116
- psci02[klane] = ks2i[0 ][2 ];
117
- psci10[klane] = ks2i[1 ][0 ];
118
- psci11[klane] = ks2i[1 ][1 ];
119
- psci12[klane] = ks2i[1 ][2 ];
120
- psci20[klane] = ks2i[2 ][0 ];
121
- psci21[klane] = ks2i[2 ][1 ];
122
- psci22[klane] = ks2i[2 ][2 ];
123
- psck00 = ks2k[0 ][0 ];
124
- psck01 = ks2k[0 ][1 ];
125
- psck02 = ks2k[0 ][2 ];
126
- psck10 = ks2k[1 ][0 ];
127
- psck11 = ks2k[1 ][1 ];
128
- psck12 = ks2k[1 ][2 ];
129
- psck20 = ks2k[2 ][0 ];
130
- psck21 = ks2k[2 ][1 ];
131
- psck22 = ks2k[2 ][2 ];
114
+ psci00[klane] + = ks2i[0 ][0 ];
115
+ psci01[klane] + = ks2i[0 ][1 ];
116
+ psci02[klane] + = ks2i[0 ][2 ];
117
+ psci10[klane] + = ks2i[1 ][0 ];
118
+ psci11[klane] + = ks2i[1 ][1 ];
119
+ psci12[klane] + = ks2i[1 ][2 ];
120
+ psci20[klane] + = ks2i[2 ][0 ];
121
+ psci21[klane] + = ks2i[2 ][1 ];
122
+ psci22[klane] + = ks2i[2 ][2 ];
123
+ psck00 + = ks2k[0 ][0 ];
124
+ psck01 + = ks2k[0 ][1 ];
125
+ psck02 + = ks2k[0 ][2 ];
126
+ psck10 + = ks2k[1 ][0 ];
127
+ psck11 + = ks2k[1 ][1 ];
128
+ psck12 + = ks2k[1 ][2 ];
129
+ psck20 + = ks2k[2 ][0 ];
130
+ psck21 + = ks2k[2 ][1 ];
131
+ psck22 + = ks2k[2 ][2 ];
132
132
}
133
133
134
134
atomic_add (psci00[threadIdx .x ], &polscale[i][0 ]);
@@ -215,24 +215,24 @@ static void alterpol_cu1(int n, TINKER_IMAGE_PARAMS, real cut, real off,
215
215
real ks2i[3 ][3 ], ks2k[3 ][3 ];
216
216
pair_alterpol (scrtyp, r, scaleb, cut, off, xr, yr, zr, springi[klane], sizi[klane],
217
217
alphai[klane], springk, sizk, alphak, ks2i, ks2k);
218
- psci00[klane] = ks2i[0 ][0 ];
219
- psci01[klane] = ks2i[0 ][1 ];
220
- psci02[klane] = ks2i[0 ][2 ];
221
- psci10[klane] = ks2i[1 ][0 ];
222
- psci11[klane] = ks2i[1 ][1 ];
223
- psci12[klane] = ks2i[1 ][2 ];
224
- psci20[klane] = ks2i[2 ][0 ];
225
- psci21[klane] = ks2i[2 ][1 ];
226
- psci22[klane] = ks2i[2 ][2 ];
227
- psck00 = ks2k[0 ][0 ];
228
- psck01 = ks2k[0 ][1 ];
229
- psck02 = ks2k[0 ][2 ];
230
- psck10 = ks2k[1 ][0 ];
231
- psck11 = ks2k[1 ][1 ];
232
- psck12 = ks2k[1 ][2 ];
233
- psck20 = ks2k[2 ][0 ];
234
- psck21 = ks2k[2 ][1 ];
235
- psck22 = ks2k[2 ][2 ];
218
+ psci00[klane] + = ks2i[0 ][0 ];
219
+ psci01[klane] + = ks2i[0 ][1 ];
220
+ psci02[klane] + = ks2i[0 ][2 ];
221
+ psci10[klane] + = ks2i[1 ][0 ];
222
+ psci11[klane] + = ks2i[1 ][1 ];
223
+ psci12[klane] + = ks2i[1 ][2 ];
224
+ psci20[klane] + = ks2i[2 ][0 ];
225
+ psci21[klane] + = ks2i[2 ][1 ];
226
+ psci22[klane] + = ks2i[2 ][2 ];
227
+ psck00 + = ks2k[0 ][0 ];
228
+ psck01 + = ks2k[0 ][1 ];
229
+ psck02 + = ks2k[0 ][2 ];
230
+ psck10 + = ks2k[1 ][0 ];
231
+ psck11 + = ks2k[1 ][1 ];
232
+ psck12 + = ks2k[1 ][2 ];
233
+ psck20 + = ks2k[2 ][0 ];
234
+ psck21 + = ks2k[2 ][1 ];
235
+ psck22 + = ks2k[2 ][2 ];
236
236
}
237
237
238
238
iid = __shfl_sync (ALL_LANES, iid, ilane + 1 );
@@ -313,24 +313,24 @@ static void alterpol_cu1(int n, TINKER_IMAGE_PARAMS, real cut, real off,
313
313
real ks2i[3 ][3 ], ks2k[3 ][3 ];
314
314
pair_alterpol (scrtyp, r, scaleb, cut, off, xr, yr, zr, springi[klane], sizi[klane],
315
315
alphai[klane], springk, sizk, alphak, ks2i, ks2k);
316
- psci00[klane] = ks2i[0 ][0 ];
317
- psci01[klane] = ks2i[0 ][1 ];
318
- psci02[klane] = ks2i[0 ][2 ];
319
- psci10[klane] = ks2i[1 ][0 ];
320
- psci11[klane] = ks2i[1 ][1 ];
321
- psci12[klane] = ks2i[1 ][2 ];
322
- psci20[klane] = ks2i[2 ][0 ];
323
- psci21[klane] = ks2i[2 ][1 ];
324
- psci22[klane] = ks2i[2 ][2 ];
325
- psck00 = ks2k[0 ][0 ];
326
- psck01 = ks2k[0 ][1 ];
327
- psck02 = ks2k[0 ][2 ];
328
- psck10 = ks2k[1 ][0 ];
329
- psck11 = ks2k[1 ][1 ];
330
- psck12 = ks2k[1 ][2 ];
331
- psck20 = ks2k[2 ][0 ];
332
- psck21 = ks2k[2 ][1 ];
333
- psck22 = ks2k[2 ][2 ];
316
+ psci00[klane] + = ks2i[0 ][0 ];
317
+ psci01[klane] + = ks2i[0 ][1 ];
318
+ psci02[klane] + = ks2i[0 ][2 ];
319
+ psci10[klane] + = ks2i[1 ][0 ];
320
+ psci11[klane] + = ks2i[1 ][1 ];
321
+ psci12[klane] + = ks2i[1 ][2 ];
322
+ psci20[klane] + = ks2i[2 ][0 ];
323
+ psci21[klane] + = ks2i[2 ][1 ];
324
+ psci22[klane] + = ks2i[2 ][2 ];
325
+ psck00 + = ks2k[0 ][0 ];
326
+ psck01 + = ks2k[0 ][1 ];
327
+ psck02 + = ks2k[0 ][2 ];
328
+ psck10 + = ks2k[1 ][0 ];
329
+ psck11 + = ks2k[1 ][1 ];
330
+ psck12 + = ks2k[1 ][2 ];
331
+ psck20 + = ks2k[2 ][0 ];
332
+ psck21 + = ks2k[2 ][1 ];
333
+ psck22 + = ks2k[2 ][2 ];
334
334
}
335
335
}
336
336
@@ -861,8 +861,7 @@ void eppcgP5(int n, const real* restrict polarity, //
861
861
{
862
862
real kaval = *ka;
863
863
real a = *ksum / kaval;
864
- if (kaval == 0 )
865
- a = 0 ;
864
+ if (kaval == 0 ) a = 0 ;
866
865
for (int i = ITHREAD; i < n; i += STRIDE) {
867
866
#pragma unroll
868
867
for (int j = 0 ; j < 3 ; ++j) {
@@ -883,8 +882,7 @@ void eppcgP6(int n, const real* restrict ksum, const real* restrict ksum1, real
883
882
{
884
883
real ksumval = *ksum;
885
884
real b = *ksum1 / ksumval;
886
- if (ksumval == 0 )
887
- b = 0 ;
885
+ if (ksumval == 0 ) b = 0 ;
888
886
for (int i = ITHREAD; i < n; i += STRIDE) {
889
887
#pragma unroll
890
888
for (int j = 0 ; j < 3 ; ++j)
@@ -968,11 +966,12 @@ void induceMutualPcg4_cu(real (*uind)[3])
968
966
const real debye = units::debye;
969
967
const real pcgpeek = polpcg::pcgpeek;
970
968
const int maxiter = 100 ; // see also subroutine induce0a in induce.f
969
+ const int miniter = std::min (3 , n);
971
970
972
971
bool done = false ;
973
972
int iter = 0 ;
974
973
real eps = 100 ;
975
- real epsold;
974
+ // real epsold;
976
975
977
976
while (not done) {
978
977
++iter;
@@ -1013,7 +1012,7 @@ void induceMutualPcg4_cu(real (*uind)[3])
1013
1012
check_rt (
1014
1013
cudaMemcpyAsync ((real*)pinned_buf, epsd, sizeof (real), cudaMemcpyDeviceToHost, g::s0));
1015
1014
check_rt (cudaStreamSynchronize (g::s0));
1016
- epsold = eps;
1015
+ // epsold = eps;
1017
1016
eps = ((real*)pinned_buf)[0 ];
1018
1017
eps = debye * REAL_SQRT (eps / n);
1019
1018
@@ -1026,16 +1025,13 @@ void induceMutualPcg4_cu(real (*uind)[3])
1026
1025
print (stdout, " %8d %-16.10f\n " , iter, eps);
1027
1026
}
1028
1027
1029
- if (eps < poleps)
1030
- done = true ;
1031
- if (eps > epsold)
1032
- done = true ;
1033
- if (iter >= politer)
1034
- done = true ;
1028
+ if (eps < poleps) done = true ;
1029
+ // if (eps > epsold) done = true;
1030
+ if (iter < miniter) done = false ;
1031
+ if (iter >= politer) done = true ;
1035
1032
1036
1033
// apply a "peek" iteration to the mutual induced dipoles
1037
- if (done)
1038
- launch_k1s (g::s0, n, eppcgPeek1, n, pcgpeek, polarity, uind, rsd);
1034
+ if (done) launch_k1s (g::s0, n, eppcgPeek1, n, pcgpeek, polarity, uind, rsd);
1039
1035
}
1040
1036
1041
1037
// print the results from the conjugate gradient iteration
@@ -1047,7 +1043,7 @@ void induceMutualPcg4_cu(real (*uind)[3])
1047
1043
}
1048
1044
1049
1045
// terminate the calculation if dipoles failed to converge
1050
- if (iter >= maxiter || eps > epsold ) {
1046
+ if (iter >= maxiter) {
1051
1047
printError ();
1052
1048
TINKER_THROW (" INDUCE -- Warning, Induced Dipoles are not Converged" );
1053
1049
}
0 commit comments