Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions source/source_base/parallel_reduce.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Force recompilation
#include "parallel_reduce.h"

#include "parallel_comm.h"
Expand Down Expand Up @@ -99,6 +100,141 @@ void Parallel_Reduce::reduce_double_diag(double* object, const int n)
return;
}

template <>
void Parallel_Reduce::reduce_pool<int>(int* object, const int n)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, POOL_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_or_all(bool& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_C_BOOL, MPI_LOR, MPI_COMM_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_max_all<double>(double& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_max_all<float>(float& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_max_all<int>(int& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_min_all<double>(double& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_min_all<float>(float& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_min_all<int>(int& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_max_pool(int* object, const int n)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_MAX, POOL_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_min_pool(double& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_MIN, POOL_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_or_bp(bool& object)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the meaning of 'or'?

{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_C_BOOL, MPI_LOR, BP_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_double_bgroup(double& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, INT_BGROUP);
#endif
return;
}

void Parallel_Reduce::reduce_double_bgroup(double* object, const int n)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, INT_BGROUP);
#endif
return;
}

void Parallel_Reduce::reduce_double_kp(double* object, const int n)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, KP_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_double_bp(double& object)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, BP_WORLD);
#endif
return;
}

void Parallel_Reduce::reduce_double_bp(double* object, const int n)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, BP_WORLD);
#endif
return;
}

template <>
void Parallel_Reduce::reduce_pool<float>(float& object)
{
Expand Down
19 changes: 19 additions & 0 deletions source/source_base/parallel_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,25 @@ void reduce_int_grid(int* object, const int n); // mohan add 2012-01-12
void reduce_double_grid(double* object, const int n);
void reduce_double_diag(double* object, const int n);

void reduce_or_all(bool& object);
template <typename T>
void reduce_max_all(T& object);
template <typename T>
void reduce_min_all(T& object);

void reduce_max_pool(int* object, const int n);
void reduce_min_pool(double& object);

void reduce_or_bp(bool& object);

void reduce_double_bgroup(double& object);
void reduce_double_bgroup(double* object, const int n);

void reduce_double_bp(double& object);
void reduce_double_bp(double* object, const int n);

void reduce_double_kp(double* object, const int n);

void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double& object);
void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double* object, const int n);

Expand Down
5 changes: 3 additions & 2 deletions source/source_basis/module_pw/pw_basis_big.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PW_BASIS_BIG_H
#include "source_base/constants.h"
#include "source_base/global_function.h"
#include "source_base/parallel_reduce.h"
#ifdef __MPI
#include "mpi.h"
#endif
Expand Down Expand Up @@ -167,7 +168,7 @@ class PW_Basis_Big : public PW_Basis_Sup
ibox[1] = 2*n2+1;
ibox[2] = 2*n3+1;
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, ibox, 3, MPI_INT, MPI_MAX , this->pool_world);
Parallel_Reduce::reduce_max_pool(ibox, 3);
#endif
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete the #ifdef __MPI and #endif ?


// Find the minimal FFT box size the factors into the primes (2,3,5,7).
Expand Down Expand Up @@ -350,7 +351,7 @@ class PW_Basis_Big : public PW_Basis_Sup
}
}
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &this->gridecut_lat, 1, MPI_DOUBLE, MPI_MIN , this->pool_world);
Parallel_Reduce::reduce_min_pool(this->gridecut_lat);
#endif
this->gridecut_lat -= 1e-6;

Expand Down
5 changes: 3 additions & 2 deletions source/source_basis/module_pw/pw_init.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "pw_basis.h"
#include "source_base/constants.h"
#include "source_base/parallel_reduce.h"

namespace ModulePW
{
Expand Down Expand Up @@ -86,7 +87,7 @@ void PW_Basis:: initgrids(
ibox[1] = 2*n2+1;
ibox[2] = 2*n3+1;
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, ibox, 3, MPI_INT, MPI_MAX , this->pool_world);
Parallel_Reduce::reduce_max_pool(ibox, 3);
#endif

// Find the minimal FFT box size the factors into the primes (2,3,5,7).
Expand Down Expand Up @@ -200,7 +201,7 @@ void PW_Basis:: initgrids(
}
}
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &this->gridecut_lat, 1, MPI_DOUBLE, MPI_MIN , this->pool_world);
Parallel_Reduce::reduce_min_pool(this->gridecut_lat);
#endif
this->gridecut_lat -= 1e-6;

Expand Down
80 changes: 75 additions & 5 deletions source/source_basis/module_pw/test/depend_mock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "mpi.h"
#endif
#include "depend_mock.h"
#include <complex>

namespace GlobalV
{
Expand All @@ -11,14 +12,83 @@ namespace GlobalV
MPI_Comm POOL_WORLD;
namespace Parallel_Reduce
{
template<typename T> void reduce_all(T& object) { return; };
template<typename T> void reduce_pool(T& object) { return; };
template<typename T> void reduce_all(T& object);
template<typename T> void reduce_all(T* object, const int n);
template<typename T> void reduce_pool(T& object);
template<typename T> void reduce_pool(T* object, const int n);

template<>
void reduce_all<double>(double& object) { return; };
void reduce_all<int>(int& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_pool<double>(double& object) { return; };
void reduce_all<long long>(long long& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_pool<float>(float& object) { return; };
void reduce_all<double>(double& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<float>(float& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<std::complex<double>>(std::complex<double>& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<std::complex<float>>(std::complex<float>& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_C_FLOAT_COMPLEX, MPI_SUM, MPI_COMM_WORLD); }

template<>
void reduce_all<int>(int* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<long long>(long long* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<double>(double* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<std::complex<double>>(std::complex<double>* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); }
template<>
void reduce_all<std::complex<float>>(std::complex<float>* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_C_FLOAT_COMPLEX, MPI_SUM, MPI_COMM_WORLD); }

template<>
void reduce_pool<float>(float& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_SUM, POOL_WORLD); }
template<>
void reduce_pool<double>(double& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); }
template<>
void reduce_pool<std::complex<double>>(std::complex<double>& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, POOL_WORLD); }

template<>
void reduce_pool<int>(int* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, POOL_WORLD); }
template<>
void reduce_pool<double>(double* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, POOL_WORLD); }

void reduce_max_pool(int* object, const int n) { MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_MAX, POOL_WORLD); }
void reduce_min_pool(double& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_MIN, POOL_WORLD); }

// Other stubs can remain as is if not used or if they don't break logic
void reduce_or_all(bool& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_C_BOOL, MPI_LOR, MPI_COMM_WORLD); }

template <typename T>
void reduce_max_all(T& object);
template<> void reduce_max_all<double>(double& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); }
template<> void reduce_max_all<float>(float& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); }
template<> void reduce_max_all<int>(int& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); }

template <typename T>
void reduce_min_all(T& object);
template<> void reduce_min_all<double>(double& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); }
template<> void reduce_min_all<float>(float& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); }
template<> void reduce_min_all<int>(int& object) { MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); }

void reduce_or_bp(bool& object) { return; };

void reduce_double_bgroup(double& object) { return; };
void reduce_double_bgroup(double* object, const int n) { return; };

void reduce_double_bp(double& object) { return; };
void reduce_double_bp(double* object, const int n) { return; };

void reduce_double_kp(double* object, const int n) { return; };

void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double& object) { return; };
void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double* object, const int n) { return; };

void gather_min_int_all(const int& nproc, int& v) { return; };
void gather_max_double_all(const int& nproc, double& v) { return; };
void gather_min_double_all(const int& nproc, double& v) { return; };
void gather_max_double_pool(const int& nproc_in_pool, double& v) { return; };
void gather_min_double_pool(const int& nproc_in_pool, double& v) { return; };
void gather_int_all(int& v, int* all) { return; };
}
#endif
4 changes: 2 additions & 2 deletions source/source_cell/parallel_kpoints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "source_base/parallel_common.h"
#include "source_base/parallel_global.h"
#include "source_base/parallel_reduce.h"

// the kpoints here are reduced after symmetry applied.
void Parallel_Kpoints::kinfo(int& nkstot_in,
Expand Down Expand Up @@ -123,8 +124,7 @@ void Parallel_Kpoints::gatherkvec(const std::vector<ModuleBase::Vector3<double>>
vec_global[i + startk_pool[this->my_pool]] = vec_local[i];
}
}

MPI_Allreduce(MPI_IN_PLACE, &vec_global[0], 3 * this->nkstot_np, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
Parallel_Reduce::reduce_all(reinterpret_cast<double*>(vec_global.data()), 3 * this->nkstot_np);
return;
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions source/source_estate/module_charge/charge_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void Charge::reduce_diff_pools(double* array_rho) const
ModuleBase::timer::tick("Charge", "reduce_diff_pools");
if (KP_WORLD != MPI_COMM_NULL)
{
MPI_Allreduce(MPI_IN_PLACE, array_rho, this->nrxx, MPI_DOUBLE, MPI_SUM, KP_WORLD);
Parallel_Reduce::reduce_double_kp(array_rho, this->nrxx);
}
else
{
Expand Down Expand Up @@ -111,7 +111,7 @@ void Charge::reduce_diff_pools(double* array_rho) const
}
if(PARAM.globalv.all_ks_run && PARAM.inp.bndpar > 1)
{
MPI_Allreduce(MPI_IN_PLACE, array_rho, this->nrxx, MPI_DOUBLE, MPI_SUM, BP_WORLD);
Parallel_Reduce::reduce_double_bp(array_rho, this->nrxx);
}
ModuleBase::timer::tick("Charge", "reduce_diff_pools");
}
Expand Down
3 changes: 2 additions & 1 deletion source/source_estate/module_charge/symmetry_rhog.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "symmetry_rho.h"
#include "source_pw/module_pwdft/global.h"
#include "source_base/parallel_global.h"
#include "source_base/parallel_reduce.h"
#include "source_hamilt/module_xc/xc_functional.h"


Expand All @@ -10,7 +11,7 @@ void Symmetry_rho::psymmg(std::complex<double>* rhog_part, const ModulePW::PW_Ba
int * fftixy2is = new int [rho_basis->fftnxy];
rho_basis->getfftixy2is(fftixy2is); //current proc
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, fftixy2is, rho_basis->fftnxy, MPI_INT, MPI_SUM, POOL_WORLD);
Parallel_Reduce::reduce_pool(fftixy2is, rho_basis->fftnxy);
if(rho_basis->poolnproc>1)
for (int i=0;i<rho_basis->fftnxy;++i)
fftixy2is[i]+=rho_basis->poolnproc-1;
Expand Down
3 changes: 2 additions & 1 deletion source/source_hsolver/diago_bpcg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "source_base/global_function.h"
#include "source_base/kernels/math_kernel_op.h"
#include "source_base/parallel_comm.h" // different MPI worlds
#include "source_base/parallel_reduce.h"
#include "source_hsolver/kernels/bpcg_kernel_op.h"
#include "para_linear_transform.h"

Expand Down Expand Up @@ -86,7 +87,7 @@ bool DiagoBPCG<T, Device>::test_error(const ct::Tensor& err_in, const std::vecto
}
}
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &not_conv, 1, MPI_C_BOOL, MPI_LOR, BP_WORLD);
Parallel_Reduce::reduce_or_bp(not_conv);
#endif
return not_conv;
}
Expand Down
Loading
Loading