@@ -399,90 +399,91 @@ void Force_LCAO_k::cal_foverlap_k(const bool isforce,
399399 const int T1 = GlobalC::ucell.iat2it [iat];
400400 Atom* atom1 = &GlobalC::ucell.atoms [T1];
401401 const int I1 = GlobalC::ucell.iat2ia [iat];
402- {
403- double *foverlap_iat = &foverlap (iat, 0 );
402+
403+ double *foverlap_iat;
404+ if (isforce) foverlap_iat = &foverlap (iat, 0 );
405+
404406#ifdef _OPENMP
405- // using local stack to avoid false sharing in multi-threaded case
406- double foverlap_temp[3 ] = {0.0 , 0.0 , 0.0 };
407- if (num_threads > 1 )
408- {
409- foverlap_iat = foverlap_temp;
410- }
407+ // using local stack to avoid false sharing in multi-threaded case
408+ double foverlap_temp[3 ] = {0.0 , 0.0 , 0.0 };
409+ if (num_threads > 1 )
410+ {
411+ foverlap_iat = foverlap_temp;
412+ }
411413#endif
412- int irr = pv->nlocstart [iat];
413- const int start1 = GlobalC::ucell.itiaiw2iwt (T1, I1, 0 );
414- for (int cb = 0 ; cb < ra.na_each [iat]; ++cb)
414+ int irr = pv->nlocstart [iat];
415+ const int start1 = GlobalC::ucell.itiaiw2iwt (T1, I1, 0 );
416+ for (int cb = 0 ; cb < ra.na_each [iat]; ++cb)
417+ {
418+ const int T2 = ra.info [iat][cb][3 ];
419+ const int I2 = ra.info [iat][cb][4 ];
420+ const int start2 = GlobalC::ucell.itiaiw2iwt (T2, I2, 0 );
421+
422+ Atom* atom2 = &GlobalC::ucell.atoms [T2];
423+
424+ for (int jj = 0 ; jj < atom1->nw ; jj++)
415425 {
416- const int T2 = ra.info [iat][cb][3 ];
417- const int I2 = ra.info [iat][cb][4 ];
418- const int start2 = GlobalC::ucell.itiaiw2iwt (T2, I2, 0 );
426+ const int iw1_all = start1 + jj;
419427
420- Atom* atom2 = &GlobalC::ucell.atoms [T2];
428+ // HPSEPS
429+ const int mu = pv->trace_loc_row [iw1_all];
430+ if (mu < 0 )
431+ continue ;
421432
422- for (int jj = 0 ; jj < atom1 ->nw ; jj ++)
433+ for (int kk = 0 ; kk < atom2 ->nw ; kk ++)
423434 {
424- const int iw1_all = start1 + jj ;
435+ const int iw2_all = start2 + kk ;
425436
426437 // HPSEPS
427- const int mu = pv->trace_loc_row [iw1_all ];
428- if (mu < 0 )
438+ const int nu = pv->trace_loc_col [iw2_all ];
439+ if (nu < 0 )
429440 continue ;
430-
431- for (int kk = 0 ; kk < atom2->nw ; kk++)
441+ // ==============================================================
442+ // here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
443+ // both are correct because the 'DSloc_Rx' is used in 'row' (-),
444+ // however, the 'DSloc_x' in GAMMA is used in 'col' (+),
445+ // mohan update 2011-06-16
446+ // ==============================================================
447+ for (int is = 0 ; is < GlobalV::NSPIN; ++is)
432448 {
433- const int iw2_all = start2 + kk;
434-
435- // HPSEPS
436- const int nu = pv->trace_loc_col [iw2_all];
437- if (nu < 0 )
438- continue ;
439- // ==============================================================
440- // here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
441- // both are correct because the 'DSloc_Rx' is used in 'row' (-),
442- // however, the 'DSloc_x' in GAMMA is used in 'col' (+),
443- // mohan update 2011-06-16
444- // ==============================================================
445- for (int is = 0 ; is < GlobalV::NSPIN; ++is)
449+ double edm2d2 = 2.0 * edm2d[is][irr];
450+ if (isforce)
446451 {
447- double edm2d2 = 2.0 * edm2d[is][irr];
448- if (isforce)
449- {
450- foverlap_iat[0 ] -= edm2d2 * this ->UHM ->LM ->DSloc_Rx [irr];
451- foverlap_iat[1 ] -= edm2d2 * this ->UHM ->LM ->DSloc_Ry [irr];
452- foverlap_iat[2 ] -= edm2d2 * this ->UHM ->LM ->DSloc_Rz [irr];
453- }
454- if (isstress)
452+ foverlap_iat[0 ] -= edm2d2 * this ->UHM ->LM ->DSloc_Rx [irr];
453+ foverlap_iat[1 ] -= edm2d2 * this ->UHM ->LM ->DSloc_Ry [irr];
454+ foverlap_iat[2 ] -= edm2d2 * this ->UHM ->LM ->DSloc_Rz [irr];
455+ }
456+ if (isstress)
457+ {
458+ for (int ipol = 0 ; ipol < 3 ; ipol++)
455459 {
456- for (int ipol = 0 ; ipol < 3 ; ipol++)
457- {
458- local_soverlap (0 , ipol) += edm2d[is][irr] * this ->UHM ->LM ->DSloc_Rx [irr]
459- * this ->UHM ->LM ->DH_r [irr * 3 + ipol];
460- if (ipol < 1 )
461- continue ;
462- local_soverlap (1 , ipol) += edm2d[is][irr] * this ->UHM ->LM ->DSloc_Ry [irr]
463- * this ->UHM ->LM ->DH_r [irr * 3 + ipol];
464- if (ipol < 2 )
465- continue ;
466- local_soverlap (2 , ipol) += edm2d[is][irr] * this ->UHM ->LM ->DSloc_Rz [irr]
467- * this ->UHM ->LM ->DH_r [irr * 3 + ipol];
468- }
460+ local_soverlap (0 , ipol) += edm2d[is][irr] * this ->UHM ->LM ->DSloc_Rx [irr]
461+ * this ->UHM ->LM ->DH_r [irr * 3 + ipol];
462+ if (ipol < 1 )
463+ continue ;
464+ local_soverlap (1 , ipol) += edm2d[is][irr] * this ->UHM ->LM ->DSloc_Ry [irr]
465+ * this ->UHM ->LM ->DH_r [irr * 3 + ipol];
466+ if (ipol < 2 )
467+ continue ;
468+ local_soverlap (2 , ipol) += edm2d[is][irr] * this ->UHM ->LM ->DSloc_Rz [irr]
469+ * this ->UHM ->LM ->DH_r [irr * 3 + ipol];
469470 }
470471 }
471- ++local_total_irr;
472- ++irr;
473- } // end kk
474- } // end jj
475- } // end cb
472+ }
473+ ++local_total_irr;
474+ ++irr;
475+ } // end kk
476+ } // end jj
477+ } // end cb
476478#ifdef _OPENMP
477- if (isforce && num_threads > 1 )
478- {
479- foverlap (iat, 0 ) += foverlap_iat[0 ];
480- foverlap (iat, 1 ) += foverlap_iat[1 ];
481- foverlap (iat, 2 ) += foverlap_iat[2 ];
482- }
483- #endif
479+ if (isforce && num_threads > 1 )
480+ {
481+ foverlap (iat, 0 ) += foverlap_iat[0 ];
482+ foverlap (iat, 1 ) += foverlap_iat[1 ];
483+ foverlap (iat, 2 ) += foverlap_iat[2 ];
484484 }
485- }
485+ #endif
486+ } // end iat
486487#ifdef _OPENMP
487488 #pragma omp critical(cal_foverlap_k_reduce)
488489 {
@@ -557,77 +558,77 @@ void Force_LCAO_k::cal_ftvnl_dphi_k(double** dm2d,
557558 const int T1 = GlobalC::ucell.iat2it [iat];
558559 Atom* atom1 = &GlobalC::ucell.atoms [T1];
559560 const int I1 = GlobalC::ucell.iat2ia [iat];
560- {
561- int irr = pv->nlocstart [iat];
562- const int start1 = GlobalC::ucell.itiaiw2iwt (T1, I1, 0 );
563- double *ftvnl_dphi_iat = &ftvnl_dphi (iat, 0 );
561+
562+ int irr = pv->nlocstart [iat];
563+ const int start1 = GlobalC::ucell.itiaiw2iwt (T1, I1, 0 );
564+ double *ftvnl_dphi_iat;
565+ if (isforce) ftvnl_dphi_iat = &ftvnl_dphi (iat, 0 );
564566#ifdef _OPENMP
565- // using local stack to avoid false sharing in multi-threaded case
566- double ftvnl_dphi_temp[3 ] = {0.0 , 0.0 , 0.0 };
567- if (num_threads > 1 )
568- {
569- ftvnl_dphi_iat = ftvnl_dphi_temp;
570- }
567+ // using local stack to avoid false sharing in multi-threaded case
568+ double ftvnl_dphi_temp[3 ] = {0.0 , 0.0 , 0.0 };
569+ if (num_threads > 1 )
570+ {
571+ ftvnl_dphi_iat = ftvnl_dphi_temp;
572+ }
571573#endif
572- for (int cb = 0 ; cb < ra.na_each [iat]; ++cb)
573- {
574- const int T2 = ra.info [iat][cb][3 ];
575- const int I2 = ra.info [iat][cb][4 ];
576- const int start2 = GlobalC::ucell.itiaiw2iwt (T2, I2, 0 );
577- Atom* atom2 = &GlobalC::ucell.atoms [T2];
574+ for (int cb = 0 ; cb < ra.na_each [iat]; ++cb)
575+ {
576+ const int T2 = ra.info [iat][cb][3 ];
577+ const int I2 = ra.info [iat][cb][4 ];
578+ const int start2 = GlobalC::ucell.itiaiw2iwt (T2, I2, 0 );
579+ Atom* atom2 = &GlobalC::ucell.atoms [T2];
578580
579- for (int jj = 0 ; jj < atom1->nw ; ++jj)
581+ for (int jj = 0 ; jj < atom1->nw ; ++jj)
582+ {
583+ const int iw1_all = start1 + jj;
584+ const int mu = pv->trace_loc_row [iw1_all];
585+ if (mu < 0 )
586+ continue ;
587+ for (int kk = 0 ; kk < atom2->nw ; ++kk)
580588 {
581- const int iw1_all = start1 + jj ;
582- const int mu = pv->trace_loc_row [iw1_all ];
583- if (mu < 0 )
589+ const int iw2_all = start2 + kk ;
590+ const int nu = pv->trace_loc_col [iw2_all ];
591+ if (nu < 0 )
584592 continue ;
585- for (int kk = 0 ; kk < atom2->nw ; ++kk)
593+ // ==============================================================
594+ // here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
595+ // both are correct because the 'DSloc_Rx' is used in 'row' (-),
596+ // however, the 'DSloc_x' is used in 'col' (+),
597+ // mohan update 2011-06-16
598+ // ==============================================================
599+ for (int is = 0 ; is < GlobalV::NSPIN; ++is)
586600 {
587- const int iw2_all = start2 + kk;
588- const int nu = pv->trace_loc_col [iw2_all];
589- if (nu < 0 )
590- continue ;
591- // ==============================================================
592- // here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
593- // both are correct because the 'DSloc_Rx' is used in 'row' (-),
594- // however, the 'DSloc_x' is used in 'col' (+),
595- // mohan update 2011-06-16
596- // ==============================================================
597- for (int is = 0 ; is < GlobalV::NSPIN; ++is)
601+ double dm2d2 = 2.0 * dm2d[is][irr];
602+ if (isforce)
598603 {
599- double dm2d2 = 2.0 * dm2d[is][irr];
600- if (isforce)
601- {
602- ftvnl_dphi_iat[0 ] += dm2d2 * this ->UHM ->LM ->DHloc_fixedR_x [irr];
603- ftvnl_dphi_iat[1 ] += dm2d2 * this ->UHM ->LM ->DHloc_fixedR_y [irr];
604- ftvnl_dphi_iat[2 ] += dm2d2 * this ->UHM ->LM ->DHloc_fixedR_z [irr];
605- }
606- if (isstress)
607- {
608- local_stvnl_dphi (0 , 0 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl11 [irr];
609- local_stvnl_dphi (0 , 1 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl12 [irr];
610- local_stvnl_dphi (0 , 2 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl13 [irr];
611- local_stvnl_dphi (1 , 1 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl22 [irr];
612- local_stvnl_dphi (1 , 2 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl23 [irr];
613- local_stvnl_dphi (2 , 2 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl33 [irr];
614- }
604+ ftvnl_dphi_iat[0 ] += dm2d2 * this ->UHM ->LM ->DHloc_fixedR_x [irr];
605+ ftvnl_dphi_iat[1 ] += dm2d2 * this ->UHM ->LM ->DHloc_fixedR_y [irr];
606+ ftvnl_dphi_iat[2 ] += dm2d2 * this ->UHM ->LM ->DHloc_fixedR_z [irr];
615607 }
616- ++local_total_irr;
617- ++irr;
618- } // end kk
619- } // end jj
620- } // end cb
608+ if (isstress)
609+ {
610+ local_stvnl_dphi (0 , 0 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl11 [irr];
611+ local_stvnl_dphi (0 , 1 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl12 [irr];
612+ local_stvnl_dphi (0 , 2 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl13 [irr];
613+ local_stvnl_dphi (1 , 1 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl22 [irr];
614+ local_stvnl_dphi (1 , 2 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl23 [irr];
615+ local_stvnl_dphi (2 , 2 ) -= dm2d[is][irr] * this ->UHM ->LM ->stvnl33 [irr];
616+ }
617+ }
618+ ++local_total_irr;
619+ ++irr;
620+ } // end kk
621+ } // end jj
622+ } // end cb
621623#ifdef _OPENMP
622- if (isforce && num_threads > 1 )
623- {
624- ftvnl_dphi (iat, 0 ) += ftvnl_dphi_iat[0 ];
625- ftvnl_dphi (iat, 1 ) += ftvnl_dphi_iat[1 ];
626- ftvnl_dphi (iat, 2 ) += ftvnl_dphi_iat[2 ];
627- }
628- #endif
624+ if (isforce && num_threads > 1 )
625+ {
626+ ftvnl_dphi (iat, 0 ) += ftvnl_dphi_iat[0 ];
627+ ftvnl_dphi (iat, 1 ) += ftvnl_dphi_iat[1 ];
628+ ftvnl_dphi (iat, 2 ) += ftvnl_dphi_iat[2 ];
629629 }
630- }
630+ #endif
631+ } // end iat
631632#ifdef _OPENMP
632633 #pragma omp critical(cal_ftvnl_dphi_k_reduce)
633634 {
0 commit comments