Skip to content

Commit 6114817

Browse files
committed
Add planewave parallel support for inner-produce like gemm_op in bpcg
1 parent 31baa6b commit 6114817

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

source/module_hsolver/diago_bpcg.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void DiagoBPCG<T, Device>::orth_cholesky(
115115
hsub_out.data<T>(),
116116
this->n_band); //ldc
117117

118-
// Parallel_Reduce::reduce_pool(hsub_out.data<T>(), this->n_band * this->n_band);
118+
Parallel_Reduce::reduce_pool(hsub_out.data<T>(), this->n_band * this->n_band);
119119

120120
// set hsub matrix to lower format;
121121
ct::kernels::set_matrix<T, ct_Device>()(
@@ -186,7 +186,7 @@ void DiagoBPCG<T, Device>::orth_projection(
186186
hsub_in.data<T>(),
187187
this->n_band); //ldc
188188

189-
// Parallel_Reduce::reduce_pool(hsub_in.data<T>(), this->n_band * this->n_band);
189+
Parallel_Reduce::reduce_pool(hsub_in.data<T>(), this->n_band * this->n_band);
190190

191191
// set_matrix_op()('L', hsub_in->data<T>(), this->n_band);
192192
option = ct::EinsumOption(
@@ -209,6 +209,7 @@ void DiagoBPCG<T, Device>::orth_projection(
209209
grad_out.data<T>(),
210210
this->n_basis); //ldc
211211

212+
// * This type of non inner produce like operation does not need reduce!
212213
// Parallel_Reduce::reduce_pool(grad_out.data<T>(), this->n_basis * this->n_band);
213214

214215
return;
@@ -242,6 +243,7 @@ void DiagoBPCG<T, Device>::rotate_wf(
242243
workspace_in.data<T>(),
243244
this->n_basis); //ldc
244245

246+
// * This type of non inner produce like operation does not need reduce!
245247
// Parallel_Reduce::reduce_pool(workspace_in.data<T>(), this->n_basis * this->n_band);
246248

247249
syncmem_complex_op()(psi_out.template data<T>(), workspace_in.template data<T>(), this->n_band * this->n_basis);
@@ -289,7 +291,7 @@ void DiagoBPCG<T, Device>::diag_hsub(
289291
hsub_out.data<T>(),
290292
this->n_band); //ldc
291293

292-
// Parallel_Reduce::reduce_pool(hsub_out.data<T>(), this->n_band * this->n_band);
294+
Parallel_Reduce::reduce_pool(hsub_out.data<T>(), this->n_band * this->n_band);
293295

294296
ct::kernels::lapack_dnevd<T, ct_Device>()('V', 'U', hsub_out.data<T>(), this->n_band, eigenvalue_out.data<Real>());
295297

0 commit comments

Comments
 (0)