@@ -15,7 +15,7 @@ template<typename TR>
1515void folding_HR (const hamilt::HContainer<TR>& hR,
1616 std::complex <double >* hk,
1717 const ModuleBase::Vector3<double >& kvec_d_in,
18- const int ncol ,
18+ const int hk_ld ,
1919 const int hk_type)
2020{
2121#ifdef _OPENMP
@@ -24,19 +24,54 @@ void folding_HR(const hamilt::HContainer<TR>& hR,
2424 for (int i = 0 ; i < hR.size_atom_pairs (); ++i)
2525 {
2626 hamilt::AtomPair<TR>& tmp = hR.get_atom_pair (i);
27- for (int ir = 0 ;ir < tmp.get_R_size (); ++ir )
27+ const int row_size = tmp.get_row_size ();
28+ const int col_size = tmp.get_col_size ();
29+ // copy hk to hk_type
30+ // hk_tmp is row-major and stored contiguously in memory,
31+ // so copy hr to hk_tmp is faster than copy hr to hk
32+ std::vector<std::complex <double >> hk_mat_tmp (row_size * col_size, 0 );
33+
34+ // copy hr to hk_tmp
35+ for (int ir = 0 ; ir < tmp.get_R_size (); ++ir)
2836 {
2937 const ModuleBase::Vector3<int > r_index = tmp.get_R_index (ir);
38+ TR* hr_mat = tmp.get_pointer (ir);
3039 // cal k_phase
3140 // if TK==std::complex<double>, kphase is e^{ikR}
3241 const ModuleBase::Vector3<double > dR (r_index.x , r_index.y , r_index.z );
3342 const double arg = (kvec_d_in * dR) * ModuleBase::TWO_PI;
3443 double sinp, cosp;
3544 ModuleBase::libm::sincos (arg, &sinp, &cosp);
3645 std::complex <double > kphase = std::complex <double >(cosp, sinp);
46+
47+ for (int i = 0 ; i < row_size * col_size; ++i)
48+ {
49+ hk_mat_tmp[i] += kphase * hr_mat[i];
50+ }
51+ }
3752
38- tmp.find_R (r_index);
39- tmp.add_to_matrix (hk, ncol, kphase, hk_type);
53+ // copy hk_tmp to hk
54+ if (hk_type == 0 )
55+ {
56+ std::complex <double >* hk_mat = hk + tmp.get_begin_row () * hk_ld + tmp.get_begin_col ();
57+ for (int irow = 0 ; irow < row_size; ++irow)
58+ {
59+ for (int icol = 0 ; icol < col_size; ++icol)
60+ {
61+ hk_mat[irow * hk_ld + icol] += hk_mat_tmp[irow * col_size + icol];
62+ }
63+ }
64+ }
65+ else if (hk_type == 1 )
66+ {
67+ std::complex <double >* hk_mat = hk + tmp.get_begin_col () * hk_ld + tmp.get_begin_row ();
68+ for (int icol = 0 ; icol < col_size; ++icol)
69+ {
70+ for (int irow = 0 ; irow < row_size; ++irow)
71+ {
72+ hk_mat[icol * hk_ld + irow] += hk_mat_tmp[irow * col_size + icol];
73+ }
74+ }
4075 }
4176 }
4277 /* for (int i = 0; i < hR.size_R_loop(); ++i)
@@ -82,7 +117,7 @@ template void folding_HR<double>(const hamilt::HContainer<double>& hR,
82117void folding_HR (const hamilt::HContainer<double >& hR,
83118 double * hk,
84119 const ModuleBase::Vector3<double >& kvec_d_in,
85- const int ncol ,
120+ const int hk_ld ,
86121 const int hk_type)
87122{
88123// in ABACUS, this function works with gamma-only case.
@@ -97,7 +132,7 @@ void folding_HR(const hamilt::HContainer<double>& hR,
97132 double kphase = 1.0 ;
98133
99134 // Hk = HR
100- hR.get_atom_pair (i).add_to_matrix (hk, ncol , kphase, hk_type);
135+ hR.get_atom_pair (i).add_to_matrix (hk, hk_ld , kphase, hk_type);
101136 }
102137}
103138
0 commit comments