Skip to content

Commit 50a204f

Browse files
dyzhengdyzheng
andauthored
Fix: performance and memory for NSPIN=4 in LCAO base (#5249)
Co-authored-by: dyzheng <[email protected]>
1 parent fea0620 commit 50a204f

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

source/module_elecstate/module_dm/density_matrix.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,11 @@ void DensityMatrix<std::complex<double>, double>::cal_DMR()
496496
{
497497
throw std::string("Atom-pair not belong this process");
498498
}
499+
std::vector<std::complex<double>> tmp_DMR;
500+
if (PARAM.inp.nspin == 4)
501+
{
502+
tmp_DMR.resize(tmp_ap.get_size());
503+
}
499504
for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir)
500505
{
501506
const ModuleBase::Vector3<int> r_index = tmp_ap.get_R_index(ir);
@@ -553,9 +558,7 @@ void DensityMatrix<std::complex<double>, double>::cal_DMR()
553558
// treat DMR as pauli matrix when NSPIN=4
554559
if (PARAM.inp.nspin == 4)
555560
{
556-
std::vector<std::complex<double>> tmp_DMR(this->_paraV->get_col_size()
557-
* this->_paraV->get_row_size(),
558-
std::complex<double>(0.0, 0.0));
561+
tmp_DMR.assign(tmp_ap.get_size(), std::complex<double>(0.0, 0.0));
559562
for (int ik = 0; ik < this->_nks; ++ik)
560563
{
561564
// cal k_phase
@@ -573,35 +576,34 @@ void DensityMatrix<std::complex<double>, double>::cal_DMR()
573576
// jump DMK to fill DMR
574577
// DMR is row-major, DMK is column-major
575578
tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap;
576-
for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu)
579+
for (int mu = 0; mu < tmp_ap.get_row_size(); ++mu)
577580
{
578-
BlasConnector::axpy(this->_paraV->get_col_size(iat2),
581+
BlasConnector::axpy(tmp_ap.get_col_size(),
579582
kphase,
580583
tmp_DMK_pointer,
581584
ld_hk,
582585
tmp_DMR_pointer,
583586
1);
584587
tmp_DMK_pointer += 1;
585-
tmp_DMR_pointer += this->_paraV->get_col_size(iat2);
588+
tmp_DMR_pointer += tmp_ap.get_col_size();
586589
}
587590
}
588591
int npol = 2;
589592
// step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4
590-
std::vector<int> step_trace(npol * npol, 0);
593+
int step_trace[4];
591594
for (int is = 0; is < npol; is++)
592595
{
593596
for (int is2 = 0; is2 < npol; is2++)
594597
{
595-
step_trace[is * npol + is2] = this->_paraV->get_col_size(iat2) * is + is2;
596-
// step_trace[is + is2 * npol] = this->_paraV->get_col_size(iat2) * is + is2;
598+
step_trace[is * npol + is2] = tmp_ap.get_col_size() * is + is2;
597599
}
598600
}
599601
std::complex<double> tmp[4];
600602
double* target_DMR = tmp_matrix->get_pointer();
601603
std::complex<double>* tmp_DMR_pointer = tmp_DMR.data();
602-
for (int irow = 0; irow < this->_paraV->get_row_size(iat1); irow += 2)
604+
for (int irow = 0; irow < tmp_ap.get_row_size(); irow += 2)
603605
{
604-
for (int icol = 0; icol < this->_paraV->get_col_size(iat2); icol += 2)
606+
for (int icol = 0; icol < tmp_ap.get_col_size(); icol += 2)
605607
{
606608
// catch the 4 spin component value of one orbital pair
607609
tmp[0] = tmp_DMR_pointer[icol + step_trace[0]];
@@ -616,8 +618,8 @@ void DensityMatrix<std::complex<double>, double>::cal_DMR()
616618
= -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real()
617619
target_DMR[icol + step_trace[3]] = tmp[0].real() - tmp[3].real();
618620
}
619-
tmp_DMR_pointer += this->_paraV->get_col_size(iat2) * 2;
620-
target_DMR += this->_paraV->get_col_size(iat2) * 2;
621+
tmp_DMR_pointer += tmp_ap.get_col_size() * 2;
622+
target_DMR += tmp_ap.get_col_size() * 2;
621623
}
622624
}
623625
}

0 commit comments

Comments
 (0)