Skip to content

Commit b025b62

Browse files
committed
Add reduce for manual inner product(for loop) ops used in bpcg
1 parent 91f62f6 commit b025b62

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

source/module_hsolver/kernels/math_kernel_op.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ struct line_minimize_with_block_op<T, base_device::DEVICE_CPU>
3535
epsilo_1 += std::real(grad_out[item] * std::conj(hpsi_out[item]));
3636
epsilo_2 += std::real(grad_out[item] * std::conj(hgrad_out[item]));
3737
}
38+
Parallel_Reduce::reduce_pool(epsilo_0);
39+
Parallel_Reduce::reduce_pool(epsilo_1);
40+
Parallel_Reduce::reduce_pool(epsilo_2);
3841
theta = 0.5 * std::abs(std::atan(2 * epsilo_1 / (epsilo_0 - epsilo_2)));
3942
cos_theta = std::cos(theta);
4043
sin_theta = std::sin(theta);
@@ -81,6 +84,7 @@ struct calc_grad_with_block_op<T, base_device::DEVICE_CPU>
8184
hpsi_out[item] *= norm;
8285
epsilo += std::real(hpsi_out[item] * std::conj(psi_out[item]));
8386
}
87+
Parallel_Reduce::reduce_pool(epsilo);
8488
for (int basis_idx = 0; basis_idx < n_basis; basis_idx++)
8589
{
8690
auto item = band_idx * n_basis_max + basis_idx;
@@ -89,6 +93,8 @@ struct calc_grad_with_block_op<T, base_device::DEVICE_CPU>
8993
err += grad_2;
9094
beta += grad_2 / prec_in[basis_idx]; /// Mark here as we should div the prec?
9195
}
96+
Parallel_Reduce::reduce_pool(err);
97+
Parallel_Reduce::reduce_pool(beta);
9298
for (int basis_idx = 0; basis_idx < n_basis; basis_idx++)
9399
{
94100
auto item = band_idx * n_basis_max + basis_idx;

0 commit comments

Comments
 (0)