Skip to content

Commit 74284f9

Browse files
Feature: Make BPCG support band parallelism (#5873)
* move kpar into read_input_item * add para_linear_transform_op * arrange the order in read_input * change name * fix compile * make bpcg support bndpar > 1 * fix BPCG * fix bug in sDFT-BPCG * make sdft+bpcg support GPU * update results * fix bug in BPCG * fix tests * fix test * update results * update results * [pre-commit.ci lite] apply automatic fixes * update --------- Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
1 parent a09497a commit 74284f9

File tree

100 files changed

+1876
-865
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+1876
-865
lines changed

source/Makefile.Objects

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ OBJS_HSOLVER=diago_cg.o\
333333
diago_david.o\
334334
diago_dav_subspace.o\
335335
diago_bpcg.o\
336+
para_linear_transform.o\
336337
hsolver.o\
337338
hsolver_pw.o\
338339
hsolver_lcaopw.o\

source/driver.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,9 @@ void Driver::reading()
152152
GlobalV::MY_RANK,
153153
PARAM.inp.bndpar,
154154
GlobalV::KPAR,
155-
GlobalV::NPROC_IN_STOGROUP,
156-
GlobalV::RANK_IN_STOGROUP,
157-
GlobalV::MY_STOGROUP,
155+
GlobalV::NPROC_IN_BNDGROUP,
156+
GlobalV::RANK_IN_BPGROUP,
157+
GlobalV::MY_BNDGROUP,
158158
GlobalV::NPROC_IN_POOL,
159159
GlobalV::RANK_IN_POOL,
160160
GlobalV::MY_POOL);

source/module_base/global_file.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -153,36 +153,32 @@ void ModuleBase::Global_File::make_dir_out(
153153
#endif
154154
}
155155

156-
std::stringstream ss,ss1;
157-
158156
// mohan add 2010-09-12
159157
if(out_alllog)
160158
{
161-
ss << "running_" << calculation << "_" << rank + 1;
162-
open_log(GlobalV::ofs_running, ss.str(), calculation, restart);
159+
open_log(GlobalV::ofs_running, PARAM.globalv.log_file, calculation, restart);
163160
#if defined(__CUDA) || defined(__ROCM)
164-
open_log(GlobalV::ofs_device, "device" + std::to_string(rank), calculation, restart);
161+
open_log(GlobalV::ofs_device, "device" + std::to_string(rank) + ".log", calculation, restart);
165162
#endif
166163
}
167164
else
168165
{
169166
if(rank==0)
170167
{
171-
ss << "running_" << calculation;
172-
open_log(GlobalV::ofs_running, ss.str(), calculation, restart);
168+
open_log(GlobalV::ofs_running, PARAM.globalv.log_file, calculation, restart);
173169
#if defined(__CUDA) || defined(__ROCM)
174-
open_log(GlobalV::ofs_device, "device", calculation, restart);
170+
open_log(GlobalV::ofs_device, "device.log", calculation, restart);
175171
#endif
176172
}
177173
}
178174

179175
if(rank==0)
180176
{
181-
open_log(GlobalV::ofs_warning, "warning", calculation, restart);
177+
open_log(GlobalV::ofs_warning, "warning.log", calculation, restart);
182178
}
183179

184180
#ifdef GATHER_INFO
185-
open_log(GlobalV::ofs_info, "math_info_" + std::to_string(rank), calculation, restart);
181+
open_log(GlobalV::ofs_info, "math_info_" + std::to_string(rank) + ".log", calculation, restart);
186182
#endif
187183

188184
return;
@@ -206,7 +202,7 @@ void ModuleBase::Global_File::open_log(std::ofstream &ofs, const std::string &fn
206202
// PARAM.globalv.global_out_dir : (default dir to store "*.log" file)
207203
//----------------------------------------------------------
208204
std::stringstream ss;
209-
ss << PARAM.globalv.global_out_dir << fn << ".log";
205+
ss << PARAM.globalv.global_out_dir << fn;
210206

211207
if(calculation == "md" && restart)
212208
{

source/module_base/global_variable.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,13 @@ namespace GlobalV
1818

1919
int NPROC = 1; ///< global number of process
2020
int KPAR = 1; ///< global number of pools
21-
int KPAR_LCAO = 1; ///< global number of pools for LCAO diagonalization only
2221
int MY_RANK = 0; ///< global index of process
2322
int MY_POOL = 0; ///< global index of pool (count in pool)
24-
int MY_STOGROUP = 0;
23+
int MY_BNDGROUP = 0;
2524
int NPROC_IN_POOL = 1; ///< local number of process in a pool
26-
int NPROC_IN_STOGROUP = 1;
25+
int NPROC_IN_BNDGROUP = 1;
2726
int RANK_IN_POOL = 0; ///< global index of pool (count in process), my_rank in each pool
28-
int RANK_IN_STOGROUP = 0;
27+
int RANK_IN_BPGROUP = 0;
2928
int DRANK = -1; ///< mohan add 2012-01-13, must be -1, so we can recognize who
3029
///< didn't in DIAG_WORLD
3130
int DSIZE = KPAR;

source/module_base/global_variable.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,21 @@ namespace GlobalV
2828
// NAME : DCOLOR( color of each group)
2929
// NAME : GRANK( index of grid world)
3030
// NAME : GSIZE( number of processors in each grid world)
31-
// NAME : KPAR_LCAO ( global number of pools for LCAO diagonalization only)
3231
//========================================================================
3332
extern int NPROC;
3433
extern int KPAR;
3534
extern int MY_RANK;
3635
extern int MY_POOL;
37-
extern int MY_STOGROUP;
36+
extern int MY_BNDGROUP;
3837
extern int NPROC_IN_POOL;
39-
extern int NPROC_IN_STOGROUP;
38+
extern int NPROC_IN_BNDGROUP;
4039
extern int RANK_IN_POOL;
41-
extern int RANK_IN_STOGROUP;
40+
extern int RANK_IN_BPGROUP;
4241
extern int DRANK;
4342
extern int DSIZE;
4443
extern int DCOLOR;
4544
extern int GRANK;
4645
extern int GSIZE;
47-
extern int KPAR_LCAO;
4846

4947
//==========================================================
5048
// NAME : ofs_running( contain information during runnnig)

source/module_base/kernels/math_kernel_op.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,11 +382,13 @@ template struct line_minimize_with_block_op<std::complex<float>, base_device::DE
382382

383383
template struct scal_op<double, base_device::DEVICE_CPU>;
384384
template struct axpy_op<std::complex<double>, base_device::DEVICE_CPU>;
385+
template struct axpy_op<double, base_device::DEVICE_CPU>;
385386
template struct gemv_op<std::complex<double>, base_device::DEVICE_CPU>;
386387
template struct gemv_op<double, base_device::DEVICE_CPU>;
387388
template struct gemm_op<std::complex<double>, base_device::DEVICE_CPU>;
388389
template struct gemm_op<double, base_device::DEVICE_CPU>;
389390
template struct dot_real_op<std::complex<double>, base_device::DEVICE_CPU>;
391+
template struct dot_real_op<double, base_device::DEVICE_CPU>;
390392
template struct vector_div_constant_op<std::complex<double>, base_device::DEVICE_CPU>;
391393
template struct vector_mul_vector_op<std::complex<double>, base_device::DEVICE_CPU>;
392394
template struct vector_div_vector_op<std::complex<double>, base_device::DEVICE_CPU>;
@@ -397,8 +399,6 @@ template struct calc_grad_with_block_op<std::complex<double>, base_device::DEVIC
397399
template struct line_minimize_with_block_op<std::complex<double>, base_device::DEVICE_CPU>;
398400

399401
#ifdef __LCAO
400-
template struct axpy_op<double, base_device::DEVICE_CPU>;
401-
template struct dot_real_op<double, base_device::DEVICE_CPU>;
402402
template struct vector_mul_vector_op<double, base_device::DEVICE_CPU>;
403403
template struct vector_div_constant_op<double, base_device::DEVICE_CPU>;
404404
template struct vector_div_vector_op<double, base_device::DEVICE_CPU>;

0 commit comments

Comments
 (0)