@@ -111,24 +111,24 @@ static void alterpol_cu1(int n, TINKER_IMAGE_PARAMS, real cut, real off,
111111 real ks2i[3 ][3 ], ks2k[3 ][3 ];
112112 pair_alterpol (scrtyp, r, scaleb, cut, off, xr, yr, zr, springi[klane], sizi[klane],
113113 alphai[klane], springk, sizk, alphak, ks2i, ks2k);
114- psci00[klane] = ks2i[0 ][0 ];
115- psci01[klane] = ks2i[0 ][1 ];
116- psci02[klane] = ks2i[0 ][2 ];
117- psci10[klane] = ks2i[1 ][0 ];
118- psci11[klane] = ks2i[1 ][1 ];
119- psci12[klane] = ks2i[1 ][2 ];
120- psci20[klane] = ks2i[2 ][0 ];
121- psci21[klane] = ks2i[2 ][1 ];
122- psci22[klane] = ks2i[2 ][2 ];
123- psck00 = ks2k[0 ][0 ];
124- psck01 = ks2k[0 ][1 ];
125- psck02 = ks2k[0 ][2 ];
126- psck10 = ks2k[1 ][0 ];
127- psck11 = ks2k[1 ][1 ];
128- psck12 = ks2k[1 ][2 ];
129- psck20 = ks2k[2 ][0 ];
130- psck21 = ks2k[2 ][1 ];
131- psck22 = ks2k[2 ][2 ];
114+ psci00[klane] + = ks2i[0 ][0 ];
115+ psci01[klane] + = ks2i[0 ][1 ];
116+ psci02[klane] + = ks2i[0 ][2 ];
117+ psci10[klane] + = ks2i[1 ][0 ];
118+ psci11[klane] + = ks2i[1 ][1 ];
119+ psci12[klane] + = ks2i[1 ][2 ];
120+ psci20[klane] + = ks2i[2 ][0 ];
121+ psci21[klane] + = ks2i[2 ][1 ];
122+ psci22[klane] + = ks2i[2 ][2 ];
123+ psck00 + = ks2k[0 ][0 ];
124+ psck01 + = ks2k[0 ][1 ];
125+ psck02 + = ks2k[0 ][2 ];
126+ psck10 + = ks2k[1 ][0 ];
127+ psck11 + = ks2k[1 ][1 ];
128+ psck12 + = ks2k[1 ][2 ];
129+ psck20 + = ks2k[2 ][0 ];
130+ psck21 + = ks2k[2 ][1 ];
131+ psck22 + = ks2k[2 ][2 ];
132132 }
133133
134134 atomic_add (psci00[threadIdx .x ], &polscale[i][0 ]);
@@ -215,24 +215,24 @@ static void alterpol_cu1(int n, TINKER_IMAGE_PARAMS, real cut, real off,
215215 real ks2i[3 ][3 ], ks2k[3 ][3 ];
216216 pair_alterpol (scrtyp, r, scaleb, cut, off, xr, yr, zr, springi[klane], sizi[klane],
217217 alphai[klane], springk, sizk, alphak, ks2i, ks2k);
218- psci00[klane] = ks2i[0 ][0 ];
219- psci01[klane] = ks2i[0 ][1 ];
220- psci02[klane] = ks2i[0 ][2 ];
221- psci10[klane] = ks2i[1 ][0 ];
222- psci11[klane] = ks2i[1 ][1 ];
223- psci12[klane] = ks2i[1 ][2 ];
224- psci20[klane] = ks2i[2 ][0 ];
225- psci21[klane] = ks2i[2 ][1 ];
226- psci22[klane] = ks2i[2 ][2 ];
227- psck00 = ks2k[0 ][0 ];
228- psck01 = ks2k[0 ][1 ];
229- psck02 = ks2k[0 ][2 ];
230- psck10 = ks2k[1 ][0 ];
231- psck11 = ks2k[1 ][1 ];
232- psck12 = ks2k[1 ][2 ];
233- psck20 = ks2k[2 ][0 ];
234- psck21 = ks2k[2 ][1 ];
235- psck22 = ks2k[2 ][2 ];
218+ psci00[klane] + = ks2i[0 ][0 ];
219+ psci01[klane] + = ks2i[0 ][1 ];
220+ psci02[klane] + = ks2i[0 ][2 ];
221+ psci10[klane] + = ks2i[1 ][0 ];
222+ psci11[klane] + = ks2i[1 ][1 ];
223+ psci12[klane] + = ks2i[1 ][2 ];
224+ psci20[klane] + = ks2i[2 ][0 ];
225+ psci21[klane] + = ks2i[2 ][1 ];
226+ psci22[klane] + = ks2i[2 ][2 ];
227+ psck00 + = ks2k[0 ][0 ];
228+ psck01 + = ks2k[0 ][1 ];
229+ psck02 + = ks2k[0 ][2 ];
230+ psck10 + = ks2k[1 ][0 ];
231+ psck11 + = ks2k[1 ][1 ];
232+ psck12 + = ks2k[1 ][2 ];
233+ psck20 + = ks2k[2 ][0 ];
234+ psck21 + = ks2k[2 ][1 ];
235+ psck22 + = ks2k[2 ][2 ];
236236 }
237237
238238 iid = __shfl_sync (ALL_LANES, iid, ilane + 1 );
@@ -313,24 +313,24 @@ static void alterpol_cu1(int n, TINKER_IMAGE_PARAMS, real cut, real off,
313313 real ks2i[3 ][3 ], ks2k[3 ][3 ];
314314 pair_alterpol (scrtyp, r, scaleb, cut, off, xr, yr, zr, springi[klane], sizi[klane],
315315 alphai[klane], springk, sizk, alphak, ks2i, ks2k);
316- psci00[klane] = ks2i[0 ][0 ];
317- psci01[klane] = ks2i[0 ][1 ];
318- psci02[klane] = ks2i[0 ][2 ];
319- psci10[klane] = ks2i[1 ][0 ];
320- psci11[klane] = ks2i[1 ][1 ];
321- psci12[klane] = ks2i[1 ][2 ];
322- psci20[klane] = ks2i[2 ][0 ];
323- psci21[klane] = ks2i[2 ][1 ];
324- psci22[klane] = ks2i[2 ][2 ];
325- psck00 = ks2k[0 ][0 ];
326- psck01 = ks2k[0 ][1 ];
327- psck02 = ks2k[0 ][2 ];
328- psck10 = ks2k[1 ][0 ];
329- psck11 = ks2k[1 ][1 ];
330- psck12 = ks2k[1 ][2 ];
331- psck20 = ks2k[2 ][0 ];
332- psck21 = ks2k[2 ][1 ];
333- psck22 = ks2k[2 ][2 ];
316+ psci00[klane] + = ks2i[0 ][0 ];
317+ psci01[klane] + = ks2i[0 ][1 ];
318+ psci02[klane] + = ks2i[0 ][2 ];
319+ psci10[klane] + = ks2i[1 ][0 ];
320+ psci11[klane] + = ks2i[1 ][1 ];
321+ psci12[klane] + = ks2i[1 ][2 ];
322+ psci20[klane] + = ks2i[2 ][0 ];
323+ psci21[klane] + = ks2i[2 ][1 ];
324+ psci22[klane] + = ks2i[2 ][2 ];
325+ psck00 + = ks2k[0 ][0 ];
326+ psck01 + = ks2k[0 ][1 ];
327+ psck02 + = ks2k[0 ][2 ];
328+ psck10 + = ks2k[1 ][0 ];
329+ psck11 + = ks2k[1 ][1 ];
330+ psck12 + = ks2k[1 ][2 ];
331+ psck20 + = ks2k[2 ][0 ];
332+ psck21 + = ks2k[2 ][1 ];
333+ psck22 + = ks2k[2 ][2 ];
334334 }
335335 }
336336
@@ -861,8 +861,7 @@ void eppcgP5(int n, const real* restrict polarity, //
861861{
862862 real kaval = *ka;
863863 real a = *ksum / kaval;
864- if (kaval == 0 )
865- a = 0 ;
864+ if (kaval == 0 ) a = 0 ;
866865 for (int i = ITHREAD; i < n; i += STRIDE) {
867866 #pragma unroll
868867 for (int j = 0 ; j < 3 ; ++j) {
@@ -883,8 +882,7 @@ void eppcgP6(int n, const real* restrict ksum, const real* restrict ksum1, real
883882{
884883 real ksumval = *ksum;
885884 real b = *ksum1 / ksumval;
886- if (ksumval == 0 )
887- b = 0 ;
885+ if (ksumval == 0 ) b = 0 ;
888886 for (int i = ITHREAD; i < n; i += STRIDE) {
889887 #pragma unroll
890888 for (int j = 0 ; j < 3 ; ++j)
@@ -968,11 +966,12 @@ void induceMutualPcg4_cu(real (*uind)[3])
968966 const real debye = units::debye;
969967 const real pcgpeek = polpcg::pcgpeek;
970968 const int maxiter = 100 ; // see also subroutine induce0a in induce.f
969+ const int miniter = std::min (3 , n);
971970
972971 bool done = false ;
973972 int iter = 0 ;
974973 real eps = 100 ;
975- real epsold;
974+ // real epsold;
976975
977976 while (not done) {
978977 ++iter;
@@ -1013,7 +1012,7 @@ void induceMutualPcg4_cu(real (*uind)[3])
10131012 check_rt (
10141013 cudaMemcpyAsync ((real*)pinned_buf, epsd, sizeof (real), cudaMemcpyDeviceToHost, g::s0));
10151014 check_rt (cudaStreamSynchronize (g::s0));
1016- epsold = eps;
1015+ // epsold = eps;
10171016 eps = ((real*)pinned_buf)[0 ];
10181017 eps = debye * REAL_SQRT (eps / n);
10191018
@@ -1026,16 +1025,13 @@ void induceMutualPcg4_cu(real (*uind)[3])
10261025 print (stdout, " %8d %-16.10f\n " , iter, eps);
10271026 }
10281027
1029- if (eps < poleps)
1030- done = true ;
1031- if (eps > epsold)
1032- done = true ;
1033- if (iter >= politer)
1034- done = true ;
1028+ if (eps < poleps) done = true ;
1029+ // if (eps > epsold) done = true;
1030+ if (iter < miniter) done = false ;
1031+ if (iter >= politer) done = true ;
10351032
10361033 // apply a "peek" iteration to the mutual induced dipoles
1037- if (done)
1038- launch_k1s (g::s0, n, eppcgPeek1, n, pcgpeek, polarity, uind, rsd);
1034+ if (done) launch_k1s (g::s0, n, eppcgPeek1, n, pcgpeek, polarity, uind, rsd);
10391035 }
10401036
10411037 // print the results from the conjugate gradient iteration
@@ -1047,7 +1043,7 @@ void induceMutualPcg4_cu(real (*uind)[3])
10471043 }
10481044
10491045 // terminate the calculation if dipoles failed to converge
1050- if (iter >= maxiter || eps > epsold ) {
1046+ if (iter >= maxiter) {
10511047 printError ();
10521048 TINKER_THROW (" INDUCE -- Warning, Induced Dipoles are not Converged" );
10531049 }
0 commit comments