Skip to content

Commit 9c57cbe

Browse files
committed
optimize func_folding
1 parent 1ab1998 commit 9c57cbe

File tree

4 files changed

+52
-14
lines changed

4 files changed

+52
-14
lines changed

source/module_elecstate/module_dm/density_matrix.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,10 @@ void DensityMatrix<std::complex<double>, double>::cal_DMR(const int ik_in)
166166
if(PARAM.inp.nspin != 4)
167167
{
168168
double* target_DMR_mat = target_DMR_mat_vec[ir];
169-
for(int irow = 0; irow < row_size; ++irow)
169+
for(int i = 0; i < mat_size; i++)
170170
{
171-
for(int icol = 0; icol < col_size; ++icol)
172-
{
173-
target_DMR_mat[irow * col_size + icol] += kphase.real() * tmp_DMK_mat[irow * col_size + icol].real()
174-
- kphase.imag() * tmp_DMK_mat[irow * col_size + icol].imag();
175-
}
171+
target_DMR_mat[i] += kphase.real() * tmp_DMK_mat[i].real()
172+
- kphase.imag() * tmp_DMK_mat[i].imag();
176173
}
177174
} else if(PARAM.inp.nspin == 4)
178175
{

source/module_hamilt_lcao/module_hcontainer/atom_pair.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ class AtomPair
118118
*/
119119
void set_zero();
120120

121+
/**
122+
* @brief get begin index of this AtomPair
123+
*/
124+
int get_begin_row() const { return this->row_ap; }
125+
int get_begin_col() const { return this->col_ap; }
126+
121127
/**
122128
* @brief get col_size for this AtomPair
123129
*/

source/module_hamilt_lcao/module_hcontainer/func_folding.cpp

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ template<typename TR>
1515
void folding_HR(const hamilt::HContainer<TR>& hR,
1616
std::complex<double>* hk,
1717
const ModuleBase::Vector3<double>& kvec_d_in,
18-
const int ncol,
18+
const int hk_ld,
1919
const int hk_type)
2020
{
2121
#ifdef _OPENMP
@@ -24,19 +24,54 @@ void folding_HR(const hamilt::HContainer<TR>& hR,
2424
for (int i = 0; i < hR.size_atom_pairs(); ++i)
2525
{
2626
hamilt::AtomPair<TR>& tmp = hR.get_atom_pair(i);
27-
for(int ir = 0;ir < tmp.get_R_size(); ++ir )
27+
const int row_size = tmp.get_row_size();
28+
const int col_size = tmp.get_col_size();
29+
// copy hk to hk_type
30+
// hk_tmp is row-major and stored contiguously in memory,
31+
// so copy hr to hk_tmp is faster than copy hr to hk
32+
std::vector<std::complex<double>> hk_mat_tmp(row_size * col_size, 0);
33+
34+
// copy hr to hk_tmp
35+
for(int ir = 0; ir < tmp.get_R_size(); ++ir)
2836
{
2937
const ModuleBase::Vector3<int> r_index = tmp.get_R_index(ir);
38+
TR* hr_mat = tmp.get_pointer(ir);
3039
// cal k_phase
3140
// if TK==std::complex<double>, kphase is e^{ikR}
3241
const ModuleBase::Vector3<double> dR(r_index.x, r_index.y, r_index.z);
3342
const double arg = (kvec_d_in * dR) * ModuleBase::TWO_PI;
3443
double sinp, cosp;
3544
ModuleBase::libm::sincos(arg, &sinp, &cosp);
3645
std::complex<double> kphase = std::complex<double>(cosp, sinp);
46+
47+
for(int i = 0; i < row_size * col_size; ++i)
48+
{
49+
hk_mat_tmp[i] += kphase * hr_mat[i];
50+
}
51+
}
3752

38-
tmp.find_R(r_index);
39-
tmp.add_to_matrix(hk, ncol, kphase, hk_type);
53+
// copy hk_tmp to hk
54+
if (hk_type == 0)
55+
{
56+
std::complex<double>* hk_mat = hk + tmp.get_begin_row() * hk_ld + tmp.get_begin_col();
57+
for(int irow = 0; irow < row_size; ++irow)
58+
{
59+
for(int icol = 0; icol < col_size; ++icol)
60+
{
61+
hk_mat[irow * hk_ld + icol] += hk_mat_tmp[irow * col_size + icol];
62+
}
63+
}
64+
}
65+
else if(hk_type == 1)
66+
{
67+
std::complex<double>* hk_mat = hk + tmp.get_begin_col() * hk_ld + tmp.get_begin_row();
68+
for(int icol = 0; icol < col_size; ++icol)
69+
{
70+
for(int irow = 0; irow < row_size; ++irow)
71+
{
72+
hk_mat[icol * hk_ld + irow] += hk_mat_tmp[irow * col_size + icol];
73+
}
74+
}
4075
}
4176
}
4277
/*for (int i = 0; i < hR.size_R_loop(); ++i)
@@ -82,7 +117,7 @@ template void folding_HR<double>(const hamilt::HContainer<double>& hR,
82117
void folding_HR(const hamilt::HContainer<double>& hR,
83118
double* hk,
84119
const ModuleBase::Vector3<double>& kvec_d_in,
85-
const int ncol,
120+
const int hk_ld,
86121
const int hk_type)
87122
{
88123
// in ABACUS, this function works with gamma-only case.
@@ -97,7 +132,7 @@ void folding_HR(const hamilt::HContainer<double>& hR,
97132
double kphase = 1.0;
98133

99134
// Hk = HR
100-
hR.get_atom_pair(i).add_to_matrix(hk, ncol, kphase, hk_type);
135+
hR.get_atom_pair(i).add_to_matrix(hk, hk_ld , kphase, hk_type);
101136
}
102137
}
103138

source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ template<typename TR>
1616
void folding_HR(const hamilt::HContainer<TR>& hR,
1717
std::complex<double>* hk,
1818
const ModuleBase::Vector3<double>& kvec_d_in,
19-
const int ncol,
19+
const int hk_ld,
2020
const int hk_type);
2121

2222
void folding_HR(const hamilt::HContainer<double>& hR,
2323
double* hk,
2424
const ModuleBase::Vector3<double>& kvec_d_in,
25-
const int ncol,
25+
const int hk_ld,
2626
const int hk_type);
2727

2828
#ifdef __MPI

0 commit comments

Comments
 (0)