@@ -68,16 +68,6 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driv
6868
6969 this ->adjs_all .clear ();
7070 this ->adjs_all .reserve (this ->ucell ->nat );
71- bool pre_cal_nlm = false ;
72- if (ucell->nat < 100 ) // less than 100 atom , cost memory for high performance
73- { // pre calculate nlm in initialization
74- this ->nlm_tot .resize (ucell->nat );
75- pre_cal_nlm = true ;
76- }
77- else
78- { // calculate nlm on the fly
79- this ->nlm_tot .resize (1 );
80- }
8171
8272 for (int iat0 = 0 ; iat0 < ucell->nat ; iat0++)
8373 {
@@ -135,10 +125,6 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driv
135125 // }
136126 }
137127 }
138- if (pre_cal_nlm)
139- {
140- this ->pre_calculate_nlm (iat0, nlm_tot[iat0]);
141- }
142128 }
143129 // allocate the memory of BaseMatrix in HR, and set the new values to zero
144130 // if (std::is_same<TK, double>::value)
@@ -234,70 +220,15 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
234220
235221#ifdef __DEEPKS
236222
237- template <typename TK, typename TR>
238- void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::pre_calculate_nlm(
239- const int iat0,
240- std::vector<std::unordered_map<int , std::vector<double >>>& nlm_in)
241- {
242- const Parallel_Orbitals* paraV = this ->hR ->get_paraV ();
243- const int npol = this ->ucell ->get_npol ();
244- auto tau0 = ucell->get_tau (iat0);
245- int T0 = 0 ;
246- int I0 = 0 ;
247- ucell->iat2iait (iat0, &I0, &T0);
248- AdjacentAtomInfo& adjs = this ->adjs_all [iat0];
249- nlm_in.resize (adjs.adj_num + 1 );
250-
251- for (int ad = 0 ; ad < adjs.adj_num + 1 ; ++ad)
252- {
253- const int T1 = adjs.ntype [ad];
254- const int I1 = adjs.natom [ad];
255- const int iat1 = ucell->itia2iat (T1, I1);
256- const ModuleBase::Vector3<double >& tau1 = adjs.adjacent_tau [ad];
257- const Atom* atom1 = &ucell->atoms [T1];
258-
259- auto all_indexes = paraV->get_indexes_row (iat1);
260- auto col_indexes = paraV->get_indexes_col (iat1);
261- // insert col_indexes into all_indexes to get universal set with no repeat elements
262- all_indexes.insert (all_indexes.end (), col_indexes.begin (), col_indexes.end ());
263- std::sort (all_indexes.begin (), all_indexes.end ());
264- all_indexes.erase (std::unique (all_indexes.begin (), all_indexes.end ()), all_indexes.end ());
265- for (int iw1l = 0 ; iw1l < all_indexes.size (); iw1l += npol)
266- {
267- const int iw1 = all_indexes[iw1l] / npol;
268- std::vector<std::vector<double >> nlm;
269- // nlm is a vector of vectors, but size of outer vector is only 1 here
270- // If we are calculating force, we need also to store the gradient
271- // and size of outer vector is then 4
272- // inner loop : all projectors (L0,M0)
273-
274- int L1 = atom1->iw2l [iw1];
275- int N1 = atom1->iw2n [iw1];
276- int m1 = atom1->iw2m [iw1];
277-
278- // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l)
279- int M1 = (m1 % 2 == 0 ) ? -m1 / 2 : (m1 + 1 ) / 2 ;
280-
281- ModuleBase::Vector3<double > dtau = tau0 - tau1;
282- intor_orb_alpha_->snap (T1, L1, N1, M1, 0 , dtau * ucell->lat0 , false /* calc_deri*/ , nlm);
283- nlm_in[ad].insert ({all_indexes[iw1l], nlm[0 ]});
284- if (npol == 2 )
285- {
286- nlm_in[ad].insert ({all_indexes[iw1l + 1 ], nlm[0 ]});
287- }
288- }
289- }
290- }
291-
292223template <typename TK, typename TR>
293224void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
294225{
295226 ModuleBase::TITLE (" DeePKS" , " calculate_HR" );
227+ ModuleBase::timer::tick (" DeePKS" , " calculate_HR" );
296228 if (this ->V_delta_R ->size_atom_pairs () == 0 )
297229 {
298230 return ;
299231 }
300- ModuleBase::timer::tick (" DeePKS" , " calculate_HR" );
301232
302233 const Parallel_Orbitals* paraV = this ->V_delta_R ->get_paraV ();
303234 const int npol = this ->ucell ->get_npol ();
@@ -361,18 +292,6 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
361292 const int trace_alpha_size = trace_alpha_row.size ();
362293 // --------------------------------------------------
363294
364- // if nlm_tot is not calculated already, calculate it on the fly now
365- std::vector<std::unordered_map<int , std::vector<double >>> nlm_on_the_fly;
366- const bool is_on_the_fly = (nlm_tot.size () != this ->ucell ->nat );
367-
368- if (is_on_the_fly)
369- {
370- this ->pre_calculate_nlm (iat0, nlm_on_the_fly);
371- }
372-
373- std::vector<std::unordered_map<int , std::vector<double >>>& nlm_iat
374- = is_on_the_fly ? nlm_on_the_fly : nlm_tot[iat0];
375-
376295 // 2. calculate <phi_I|beta>D<beta|phi_{J,R}> for each pair of <IJR> atoms
377296 for (int ad1 = 0 ; ad1 < adjs.adj_num + 1 ; ++ad1)
378297 {
@@ -386,11 +305,17 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
386305 {
387306 continue ;
388307 }
308+ ModuleBase::Vector3<int > dR1 (adjs.box [ad1].x , adjs.box [ad1].y , adjs.box [ad1].z );
309+ if (this ->ld ->phialpha [0 ]->find_matrix (iat0, iat1, dR1.x , dR1.y , dR1.z ) == nullptr )
310+ {
311+ continue ;
312+ }
389313
390314 std::vector<double > s_1t (trace_alpha_size * row_size);
391315 for (int irow = 0 ; irow < row_size; irow++)
392316 {
393- const double * row_ptr = nlm_iat[ad1][row_indexes[irow]].data ();
317+ const hamilt::BaseMatrix<double >* overlap_1 = this ->ld ->phialpha [0 ]->find_matrix (iat0, iat1, dR1);
318+ const double * row_ptr = overlap_1->get_pointer () + row_indexes[irow] * overlap_1->get_col_size ();
394319 double * ps1t = &s_1t[irow * trace_alpha_size];
395320 for (int i = 0 ; i < trace_alpha_size; i++)
396321 {
@@ -415,11 +340,23 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
415340 }
416341 auto col_indexes = paraV->get_indexes_col (iat2);
417342 const int col_size = col_indexes.size ();
343+
344+ if (col_size == 0 )
345+ {
346+ continue ;
347+ }
348+ ModuleBase::Vector3<int > dR2 (adjs.box [ad2].x , adjs.box [ad2].y , adjs.box [ad2].z );
349+ if (this ->ld ->phialpha [0 ]->find_matrix (iat0, iat2, dR2.x , dR2.y , dR2.z ) == nullptr )
350+ {
351+ continue ;
352+ }
353+
418354 std::vector<double > hr_current (row_size * col_size, 0 );
419355 std::vector<double > s_2t (trace_alpha_size * col_size);
420356 for (int icol = 0 ; icol < col_size; icol++)
421357 {
422- const double * col_ptr = nlm_iat[ad2][col_indexes[icol]].data ();
358+ const hamilt::BaseMatrix<double >* overlap_2 = this ->ld ->phialpha [0 ]->find_matrix (iat0, iat2, dR2);
359+ const double * col_ptr = overlap_2->get_pointer () + col_indexes[icol] * overlap_2->get_col_size ();
423360 double * ps2t = &s_2t[icol * trace_alpha_size];
424361 for (int i = 0 ; i < trace_alpha_size; i++)
425362 {
0 commit comments