Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions source/module_hamilt_lcao/module_gint/gint.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ class Gint {
//! psir_ylm: dim is [bxyz][LD_pool]
//! psir_vlbr3: dim is [bxyz][LD_pool]
//! hR: HContainer for storing the <phi_0|V|phi_R> matrix elements
//! cal_meshball_vlocal is thread-safe!
void cal_meshball_vlocal(
const int na_grid,
const int LD_pool,
Expand Down
38 changes: 10 additions & 28 deletions source/module_hamilt_lcao/module_gint/gint_vl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <mkl_service.h>
#endif


// this is a thread-safe function
void Gint::cal_meshball_vlocal(
const int na_grid, // how many atoms on this (i,j,k) grid
const int LD_pool,
Expand All @@ -36,6 +36,7 @@ void Gint::cal_meshball_vlocal(
const int lgd_now = this->gridt->lgd;

const int mcell_index = this->gridt->bcell_start[grid_index];
std::vector<double> hr_tmp;
for(int ia1=0; ia1<na_grid; ++ia1)
{
const int bcell1 = mcell_index + ia1;
Expand Down Expand Up @@ -80,33 +81,14 @@ void Gint::cal_meshball_vlocal(
}
const int m = tmp_matrix->get_row_size();
const int n = tmp_matrix->get_col_size();

int cal_pair_num=0;
for(int ib=first_ib;ib<last_ib; ++ib)
{
cal_pair_num += cal_flag[ib][ia1] && cal_flag[ib][ia2];
}
if(cal_pair_num>ib_length/4)
{
dgemm_(&transa, &transb, &n, &m, &ib_length, &alpha,
&psir_vlbr3[first_ib][block_index[ia2]], &LD_pool,
&psir_ylm[first_ib][block_index[ia1]], &LD_pool,
&beta, tmp_matrix->get_pointer(), &n);
}
else
{
for(int ib=first_ib; ib<last_ib; ++ib)
{
if(cal_flag[ib][ia1] && cal_flag[ib][ia2])
{
int k=1;
dgemm_(&transa, &transb, &n, &m, &k, &alpha,
&psir_vlbr3[ib][block_index[ia2]], &LD_pool,
&psir_ylm[ib][block_index[ia1]], &LD_pool,
&beta, tmp_matrix->get_pointer(), &n);
}
}
}
hr_tmp.resize(m * n);
ModuleBase::GlobalFunc::ZEROS(hr_tmp.data(), m*n);

dgemm_(&transa, &transb, &n, &m, &ib_length, &alpha,
&psir_vlbr3[first_ib][block_index[ia2]], &LD_pool,
&psir_ylm[first_ib][block_index[ia1]], &LD_pool,
&beta, hr_tmp.data(), &n);
tmp_matrix->add_array_ts(hr_tmp.data());
}
}
}
Expand Down
63 changes: 8 additions & 55 deletions source/module_hamilt_lcao/module_gint/gint_vl_cpu_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ void Gint::gint_kernel_vlocal(Gint_inout* inout) {
{ /**
* @brief When in OpenMP, it points to a newly allocated memory,
*/
hamilt::HContainer<double> hRGint_thread(*hRGint_kernel);
std::vector<int> block_iw(max_size,0);
std::vector<int> block_index(max_size+1,0);
std::vector<int> block_size(max_size,0);
Expand Down Expand Up @@ -74,19 +73,8 @@ void Gint::gint_kernel_vlocal(Gint_inout* inout) {
this->cal_meshball_vlocal(
na_grid, LD_pool, block_size.data(), block_index.data(), grid_index,
cal_flag.get_ptr_2D(),psir_ylm.get_ptr_2D(), psir_vlbr3.get_ptr_2D(),
&hRGint_thread);
hRGint_kernel);
}

#pragma omp critical
{
BlasConnector::axpy(hRGint_thread.get_nnr(),
1.0,
hRGint_thread.get_wrapper(),
1,
hRGint_kernel->get_wrapper(),
1);
}

ModuleBase::TITLE("Gint_interface", "cal_gint_vlocal");
ModuleBase::timer::tick("Gint_interface", "cal_gint_vlocal");
}
Expand All @@ -112,9 +100,6 @@ void Gint::gint_kernel_dvlocal(Gint_inout* inout) {

#pragma omp parallel
{
hamilt::HContainer<double> pvdpRx_thread(pvdpRx_reduced[inout->ispin]);
hamilt::HContainer<double> pvdpRy_thread(pvdpRy_reduced[inout->ispin]);
hamilt::HContainer<double> pvdpRz_thread(pvdpRz_reduced[inout->ispin]);
std::vector<int> block_iw(max_size,0);
std::vector<int> block_index(max_size+1,0);
std::vector<int> block_size(max_size,0);
Expand Down Expand Up @@ -159,34 +144,13 @@ void Gint::gint_kernel_dvlocal(Gint_inout* inout) {
//and accumulates to the corresponding element in Hamiltonian
this->cal_meshball_vlocal(na_grid, LD_pool, block_size.data(), block_index.data(),
grid_index, cal_flag.get_ptr_2D(),psir_vlbr3.get_ptr_2D(),
dpsir_ylm_x.get_ptr_2D(), &pvdpRx_thread);
dpsir_ylm_x.get_ptr_2D(), &this->pvdpRx_reduced[inout->ispin]);
this->cal_meshball_vlocal(na_grid, LD_pool, block_size.data(), block_index.data(),
grid_index, cal_flag.get_ptr_2D(),psir_vlbr3.get_ptr_2D(),
dpsir_ylm_y.get_ptr_2D(), &pvdpRy_thread);
dpsir_ylm_y.get_ptr_2D(), &this->pvdpRy_reduced[inout->ispin]);
this->cal_meshball_vlocal(na_grid, LD_pool, block_size.data(), block_index.data(),
grid_index, cal_flag.get_ptr_2D(),psir_vlbr3.get_ptr_2D(),
dpsir_ylm_z.get_ptr_2D(), &pvdpRz_thread);
}
#pragma omp critical(gint_k)
{
BlasConnector::axpy(nnrg,
1.0,
pvdpRx_thread.get_wrapper(),
1,
this->pvdpRx_reduced[inout->ispin].get_wrapper(),
1);
BlasConnector::axpy(nnrg,
1.0,
pvdpRy_thread.get_wrapper(),
1,
this->pvdpRy_reduced[inout->ispin].get_wrapper(),
1);
BlasConnector::axpy(nnrg,
1.0,
pvdpRz_thread.get_wrapper(),
1,
this->pvdpRz_reduced[inout->ispin].get_wrapper(),
1);
dpsir_ylm_z.get_ptr_2D(), &this->pvdpRz_reduced[inout->ispin]);
}
}
ModuleBase::TITLE("Gint_interface", "cal_gint_dvlocal");
Expand All @@ -210,7 +174,6 @@ void Gint::gint_kernel_vlocal_meta(Gint_inout* inout) {
{
// define HContainer here to reference.
//Under the condition of gamma_only, hRGint will be instantiated.
hamilt::HContainer<double> hRGint_thread(*hRGint_kernel);
std::vector<int> block_iw(max_size,0);
std::vector<int> block_index(max_size+1,0);
std::vector<int> block_size(max_size,0);
Expand Down Expand Up @@ -282,28 +245,18 @@ void Gint::gint_kernel_vlocal_meta(Gint_inout* inout) {
//and accumulates to the corresponding element in Hamiltonian
this->cal_meshball_vlocal(
na_grid, LD_pool, block_size.data(), block_index.data(), grid_index, cal_flag.get_ptr_2D(),
psir_ylm.get_ptr_2D(), psir_vlbr3.get_ptr_2D(), &hRGint_thread);
psir_ylm.get_ptr_2D(), psir_vlbr3.get_ptr_2D(), hRGint_kernel);
//integrate (d/dx_i psi_mu*vk(r)*dv) * (d/dx_i psi_nu) on grid (x_i=x,y,z)
//and accumulates to the corresponding element in Hamiltonian
this->cal_meshball_vlocal(
na_grid, LD_pool, block_size.data(), block_index.data(), grid_index, cal_flag.get_ptr_2D(),
dpsir_ylm_x.get_ptr_2D(), dpsix_vlbr3.get_ptr_2D(), &hRGint_thread);
dpsir_ylm_x.get_ptr_2D(), dpsix_vlbr3.get_ptr_2D(), hRGint_kernel);
this->cal_meshball_vlocal(
na_grid, LD_pool, block_size.data(), block_index.data(), grid_index, cal_flag.get_ptr_2D(),
dpsir_ylm_y.get_ptr_2D(), dpsiy_vlbr3.get_ptr_2D(), &hRGint_thread);
dpsir_ylm_y.get_ptr_2D(), dpsiy_vlbr3.get_ptr_2D(), hRGint_kernel);
this->cal_meshball_vlocal(
na_grid, LD_pool, block_size.data(), block_index.data(), grid_index, cal_flag.get_ptr_2D(),
dpsir_ylm_z.get_ptr_2D(), dpsiz_vlbr3.get_ptr_2D(), &hRGint_thread);
}

#pragma omp critical
{
BlasConnector::axpy(nnrg,
1.0,
hRGint_thread.get_wrapper(),
1,
hRGint_kernel->get_wrapper(),
1);
dpsir_ylm_z.get_ptr_2D(), dpsiz_vlbr3.get_ptr_2D(), hRGint_kernel);
}
}

Expand Down
8 changes: 1 addition & 7 deletions source/module_hamilt_lcao/module_gint/temp_gint/gint_vl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ void Gint_vl::cal_hr_gint_()
PhiOperator phi_op;
std::vector<double> phi;
std::vector<double> phi_vldr3;
HContainer<double> hr_gint_local(*hr_gint_);
#pragma omp for schedule(dynamic)
for(const auto& biggrid: gint_info_->get_biggrids())
{
Expand All @@ -47,12 +46,7 @@ void Gint_vl::cal_hr_gint_()
phi_vldr3.resize(phi_len);
phi_op.set_phi(phi.data());
phi_op.phi_mul_vldr3(vr_eff_, dr3_, phi.data(), phi_vldr3.data());
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), &hr_gint_local);
}
#pragma omp critical
{
BlasConnector::axpy(hr_gint_local.get_nnr(), 1.0, hr_gint_local.get_wrapper(),
1, hr_gint_->get_wrapper(), 1);
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), *hr_gint_);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ void Gint_vl_metagga::cal_hr_gint_()
std::vector<double> dphi_x_vldr3;
std::vector<double> dphi_y_vldr3;
std::vector<double> dphi_z_vldr3;
HContainer<double> hr_gint_local(*hr_gint_);
#pragma omp for schedule(dynamic)
for(const auto& biggrid: gint_info_->get_biggrids())
{
Expand All @@ -62,15 +61,10 @@ void Gint_vl_metagga::cal_hr_gint_()
phi_op.phi_mul_vldr3(vofk_, dr3_, dphi_x.data(), dphi_x_vldr3.data());
phi_op.phi_mul_vldr3(vofk_, dr3_, dphi_y.data(), dphi_y_vldr3.data());
phi_op.phi_mul_vldr3(vofk_, dr3_, dphi_z.data(), dphi_z_vldr3.data());
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), &hr_gint_local);
phi_op.phi_mul_phi_vldr3(dphi_x.data(), dphi_x_vldr3.data(), &hr_gint_local);
phi_op.phi_mul_phi_vldr3(dphi_y.data(), dphi_y_vldr3.data(), &hr_gint_local);
phi_op.phi_mul_phi_vldr3(dphi_z.data(), dphi_z_vldr3.data(), &hr_gint_local);
}
#pragma omp critical
{
BlasConnector::axpy(hr_gint_local.get_nnr(), 1.0, hr_gint_local.get_wrapper(),
1, hr_gint_->get_wrapper(), 1);
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), *hr_gint_);
phi_op.phi_mul_phi_vldr3(dphi_x.data(), dphi_x_vldr3.data(), *hr_gint_);
phi_op.phi_mul_phi_vldr3(dphi_y.data(), dphi_y_vldr3.data(), *hr_gint_);
phi_op.phi_mul_phi_vldr3(dphi_z.data(), dphi_z_vldr3.data(), *hr_gint_);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ void Gint_vl_metagga_nspin4::cal_hr_gint_()
std::vector<double> dphi_x_vldr3;
std::vector<double> dphi_y_vldr3;
std::vector<double> dphi_z_vldr3;
std::vector<HContainer<double>> hr_gint_part_thread(nspin_, *hr_gint_part_[0]);
#pragma omp for schedule(dynamic)
for(const auto& biggrid: gint_info_->get_biggrids())
{
Expand All @@ -66,20 +65,10 @@ void Gint_vl_metagga_nspin4::cal_hr_gint_()
phi_op.phi_mul_vldr3(vofk_[is], dr3_, dphi_x.data(), dphi_x_vldr3.data());
phi_op.phi_mul_vldr3(vofk_[is], dr3_, dphi_y.data(), dphi_y_vldr3.data());
phi_op.phi_mul_vldr3(vofk_[is], dr3_, dphi_z.data(), dphi_z_vldr3.data());
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), &hr_gint_part_thread[is]);
phi_op.phi_mul_phi_vldr3(dphi_x.data(), dphi_x_vldr3.data(), &hr_gint_part_thread[is]);
phi_op.phi_mul_phi_vldr3(dphi_y.data(), dphi_y_vldr3.data(), &hr_gint_part_thread[is]);
phi_op.phi_mul_phi_vldr3(dphi_z.data(), dphi_z_vldr3.data(), &hr_gint_part_thread[is]);
}
}
#pragma omp critical
{
for(int is = 0; is < nspin_; is++)
{
{
BlasConnector::axpy(hr_gint_part_thread[is].get_nnr(), 1.0, hr_gint_part_thread[is].get_wrapper(),
1, hr_gint_part_[is]->get_wrapper(), 1);
}
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), *hr_gint_part_[is]);
phi_op.phi_mul_phi_vldr3(dphi_x.data(), dphi_x_vldr3.data(), *hr_gint_part_[is]);
phi_op.phi_mul_phi_vldr3(dphi_y.data(), dphi_y_vldr3.data(), *hr_gint_part_[is]);
phi_op.phi_mul_phi_vldr3(dphi_z.data(), dphi_z_vldr3.data(), *hr_gint_part_[is]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ void Gint_vl_nspin4::cal_hr_gint_()
PhiOperator phi_op;
std::vector<double> phi;
std::vector<double> phi_vldr3;
std::vector<HContainer<double>> hr_gint_part_thread(nspin_, *hr_gint_part_[0]);
#pragma omp for schedule(dynamic)
for(const auto& biggrid: gint_info_->get_biggrids())
{
Expand All @@ -50,17 +49,7 @@ void Gint_vl_nspin4::cal_hr_gint_()
for(int is = 0; is < nspin_; is++)
{
phi_op.phi_mul_vldr3(vr_eff_[is], dr3_, phi.data(), phi_vldr3.data());
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), &hr_gint_part_thread[is]);
}
}
#pragma omp critical
{
for(int is = 0; is < nspin_; is++)
{
{
BlasConnector::axpy(hr_gint_part_thread[is].get_nnr(), 1.0, hr_gint_part_thread[is].get_wrapper(),
1, hr_gint_part_[is]->get_wrapper(), 1);
}
phi_op.phi_mul_phi_vldr3(phi.data(), phi_vldr3.data(), *hr_gint_part_[is]);
}
}
}
Expand Down
17 changes: 13 additions & 4 deletions source/module_hamilt_lcao/module_gint/temp_gint/phi_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,34 +148,41 @@ void PhiOperator::phi_mul_vldr3(const double* vl, const double dr3, const double
}
}

// this is a thread-safe function
void PhiOperator::phi_mul_phi_vldr3(
const double* phi,
const double* phi_vldr3,
HContainer<double>* hr) const
HContainer<double>& hr) const
{
const char transa='N', transb='T';
const double alpha=1, beta=1;

std::vector<double> tmp_hr;
for(int i = 0; i < biggrid_->get_atoms_num(); ++i)
{
const auto atom_i = biggrid_->get_atom(i);
const auto& r_i = atom_i->get_R();
const int iat_i = atom_i->get_iat();
const int m = atoms_phi_len_[i];

for(int j = 0; j < biggrid_->get_atoms_num(); ++j)
{
const auto atom_j = biggrid_->get_atom(j);
const auto& r_j = atom_j->get_R();
const int iat_j = atom_j->get_iat();
const int n = atoms_phi_len_[j];

// only calculate the upper triangle matrix
if(iat_i > iat_j)
{
continue;
}

tmp_hr.resize(m * n);
ModuleBase::GlobalFunc::ZEROS(tmp_hr.data(), m*n);

// FIXME may be r = r_j - r_i
const auto result = hr->find_matrix(iat_i, iat_j, r_i-r_j);
const auto result = hr.find_matrix(iat_i, iat_j, r_i-r_j);

if(result == nullptr)
{
Expand All @@ -191,8 +198,10 @@ void PhiOperator::phi_mul_phi_vldr3(
continue;
}

dgemm_(&transa, &transb, &atoms_phi_len_[j], &atoms_phi_len_[i], &len, &alpha, &phi_vldr3[start_idx * cols_ + atoms_startidx_[j]],
&cols_,&phi[start_idx * cols_ + atoms_startidx_[i]], &cols_, &beta, result->get_pointer(), &atoms_phi_len_[j]);
dgemm_(&transa, &transb, &n, &m, &len, &alpha, &phi_vldr3[start_idx * cols_ + atoms_startidx_[j]],
&cols_,&phi[start_idx * cols_ + atoms_startidx_[i]], &cols_, &beta, tmp_hr.data(), &n);

result->add_array_ts(tmp_hr.data());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@ class PhiOperator
const double* phi,
double* result) const;

// this is a thread-safe function
void phi_mul_phi_vldr3(
const double* phi,
const double* phi_vldr3,
HContainer<double>* hr) const;
HContainer<double>& hr) const;

void phi_dot_phi_dm(
const double* phi,
Expand Down
12 changes: 12 additions & 0 deletions source/module_hamilt_lcao/module_hcontainer/base_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define BASE_MATRIX_H

#include <iostream>
#include <mutex>

namespace hamilt
{
Expand Down Expand Up @@ -107,6 +108,15 @@ class BaseMatrix
*/
void set_size(const int& col_size_in, const int& row_size_in);

void add_array_ts(T* array)
{
std::lock_guard<std::mutex> lock(mtx);
for (int i = 0; i < nrow_local * ncol_local; ++i)
{
value_begin[i] += array[i];
}
}

private:
bool allocated = false;

Expand All @@ -118,6 +128,8 @@ class BaseMatrix

// int current_multiple = 0;

// for thread safe
mutable std::mutex mtx;
// number of rows and columns
int nrow_local = 0;
int ncol_local = 0;
Expand Down
Loading