Skip to content

Commit 48e91db

Browse files
committed
fix vloc computation in cal_force_loc_sincos_op
1 parent 06b2e25 commit 48e91db

File tree

5 files changed

+10
-28
lines changed

5 files changed

+10
-28
lines changed

source/module_hamilt_pw/hamilt_pwdft/forces.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,6 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
537537

538538
// 准备原子相关数据:按照全局原子索引iat顺序
539539
std::vector<FPTYPE> tau_flat(this->nat * 3);
540-
std::vector<int> iat2it_host(this->nat);
541540
std::vector<FPTYPE> gcar_flat(rho_basis->npw * 3);
542541

543542
// 按照原始代码逻辑:遍历全局原子索引iat,通过查表获取(it,ia)
@@ -548,7 +547,6 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
548547
tau_flat[iat * 3 + 0] = static_cast<FPTYPE>(ucell.atoms[it].tau[ia][0]);
549548
tau_flat[iat * 3 + 1] = static_cast<FPTYPE>(ucell.atoms[it].tau[ia][1]);
550549
tau_flat[iat * 3 + 2] = static_cast<FPTYPE>(ucell.atoms[it].tau[ia][2]);
551-
iat2it_host[iat] = it;
552550
}
553551

554552
for (int ig = 0; ig < rho_basis->npw; ig++) {
@@ -559,9 +557,10 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
559557

560558
// 重新计算vloc_factors考虑所有原子类型
561559
std::vector<FPTYPE> vloc_per_type_host(ucell.ntype * rho_basis->npw);
562-
for (int it = 0; it < ucell.ntype; it++) {
560+
for (int iat = 0; iat < this->nat; iat++) {
561+
int it = ucell.iat2it[iat];
563562
for (int ig = 0; ig < rho_basis->npw; ig++) {
564-
vloc_per_type_host[it * rho_basis->npw + ig] = static_cast<FPTYPE>(vloc(it, rho_basis->ig2igg[ig]));
563+
vloc_per_type_host[iat * rho_basis->npw + ig] = static_cast<FPTYPE>(vloc(it, rho_basis->ig2igg[ig]));
565564
}
566565
}
567566

@@ -574,7 +573,6 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
574573
// 设备端内存和指针设置(根据设备类型分支)
575574
FPTYPE* d_gcar = gcar_flat.data();
576575
FPTYPE* d_tau = tau_flat.data();
577-
int* d_iat2it = iat2it_host.data();
578576
FPTYPE* d_vloc_per_type = vloc_per_type_host.data();
579577
std::complex<FPTYPE>* d_aux = aux_fptype.data();
580578
FPTYPE* d_force = nullptr;
@@ -584,21 +582,18 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
584582
{
585583
d_gcar = nullptr;
586584
d_tau = nullptr;
587-
d_iat2it = nullptr;
588585
d_vloc_per_type = nullptr;
589586
d_aux = nullptr;
590587

591588
resmem_var_op()(this->ctx, d_gcar, rho_basis->npw * 3);
592589
resmem_var_op()(this->ctx, d_tau, this->nat * 3);
593-
resmem_int_op()(this->ctx, d_iat2it, this->nat);
594590
resmem_var_op()(this->ctx, d_vloc_per_type, ucell.ntype * rho_basis->npw);
595591
resmem_complex_op()(this->ctx, d_aux, rho_basis->npw);
596592
resmem_var_op()(this->ctx, d_force, this->nat * 3);
597593

598594
// 数据传输到设备
599595
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_gcar, gcar_flat.data(), rho_basis->npw * 3);
600596
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_tau, tau_flat.data(), this->nat * 3);
601-
syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, d_iat2it, iat2it_host.data(), this->nat);
602597
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_vloc_per_type, vloc_per_type_host.data(), ucell.ntype * rho_basis->npw);
603598
syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, d_aux, aux_fptype.data(), rho_basis->npw);
604599

@@ -623,7 +618,6 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
623618
ucell.ntype,
624619
d_gcar,
625620
d_tau,
626-
d_iat2it,
627621
d_vloc_per_type,
628622
d_aux,
629623
scale_factor,
@@ -639,7 +633,6 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
639633
// 清理设备内存
640634
delmem_var_op()(this->ctx, d_gcar);
641635
delmem_var_op()(this->ctx, d_tau);
642-
delmem_int_op()(this->ctx, d_iat2it);
643636
delmem_var_op()(this->ctx, d_vloc_per_type);
644637
delmem_complex_op()(this->ctx, d_aux);
645638
delmem_var_op()(this->ctx, d_force);
@@ -652,7 +645,7 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
652645
forcelc(iat, 2) = static_cast<double>(force_host[iat * 3 + 2]);
653646
}
654647

655-
// this->print(GlobalV::ofs_running, "local forces", forcelc);
648+
// this->print(GlobalV: :ofs_running, "local forces", forcelc);
656649
Parallel_Reduce::reduce_pool(forcelc.c, forcelc.nr * forcelc.nc);
657650
delete[] aux;
658651
ModuleBase::timer::tick("Forces", "cal_force_loc");

source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/force_op.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ __global__ void cal_force_loc_sincos_kernel(
2121
const int ntype,
2222
const FPTYPE* gcar,
2323
const FPTYPE* tau,
24-
const int* iat2it,
2524
const FPTYPE* vloc_per_type,
2625
const thrust::complex<FPTYPE>* aux,
2726
const FPTYPE scale_factor,
@@ -36,7 +35,6 @@ __global__ void cal_force_loc_sincos_kernel(
3635
if (iat >= nat) return;
3736

3837
// Load atom information to registers
39-
const int it = iat2it[iat];
4038
const FPTYPE tau_x = tau[iat * 3 + 0];
4139
const FPTYPE tau_y = tau[iat * 3 + 1];
4240
const FPTYPE tau_z = tau[iat * 3 + 2];
@@ -58,7 +56,7 @@ __global__ void cal_force_loc_sincos_kernel(
5856
sincos(phase, &sinp, &cosp);
5957

6058
// Calculate force factor
61-
const FPTYPE vloc_factor = vloc_per_type[it * npw + ig];
59+
const FPTYPE vloc_factor = vloc_per_type[iat * npw + ig];
6260
const FPTYPE factor = vloc_factor * (cosp * aux[ig].imag() + sinp * aux[ig].real());
6361

6462
// Accumulate force contributions
@@ -316,7 +314,6 @@ void cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
316314
const int& ntype,
317315
const FPTYPE* gcar,
318316
const FPTYPE* tau,
319-
const int* iat2it,
320317
const FPTYPE* vloc_per_type,
321318
const std::complex<FPTYPE>* aux,
322319
const FPTYPE& scale_factor,
@@ -331,7 +328,7 @@ void cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
331328
dim3 block(threads_per_block);
332329

333330
cal_force_loc_sincos_kernel<FPTYPE><<<grid, block>>>(
334-
nat, npw, ntype, gcar, tau, iat2it, vloc_per_type,
331+
nat, npw, ntype, gcar, tau, vloc_per_type,
335332
reinterpret_cast<const thrust::complex<FPTYPE>*>(aux),
336333
scale_factor, force
337334
);

source/module_hamilt_pw/hamilt_pwdft/kernels/force_op.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,6 @@ struct cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_CPU>
438438
const int& ntype,
439439
const FPTYPE* gcar,
440440
const FPTYPE* tau,
441-
const int* iat2it,
442441
const FPTYPE* vloc_per_type,
443442
const std::complex<FPTYPE>* aux,
444443
const FPTYPE& scale_factor,
@@ -451,7 +450,6 @@ struct cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_CPU>
451450
#endif
452451
for (int iat = 0; iat < nat; ++iat)
453452
{
454-
const int it = iat2it[iat];
455453
const FPTYPE tau_x = tau[iat * 3 + 0];
456454
const FPTYPE tau_y = tau[iat * 3 + 1];
457455
const FPTYPE tau_z = tau[iat * 3 + 2];
@@ -466,7 +464,7 @@ struct cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_CPU>
466464
FPTYPE sinp, cosp;
467465
ModuleBase::libm::sincos(phase, &sinp, &cosp);
468466

469-
const FPTYPE vloc_factor = vloc_per_type[it * npw + ig];
467+
const FPTYPE vloc_factor = vloc_per_type[iat * npw + ig];
470468
const FPTYPE factor = vloc_factor * (cosp * aux[ig].imag() + sinp * aux[ig].real()) * scale_factor;
471469

472470
local_force[0] += gcar[ig * 3 + 0] * factor;

source/module_hamilt_pw/hamilt_pwdft/kernels/force_op.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,7 @@ struct cal_force_loc_sincos_op
161161
/// @param ntype - number of atom types
162162
/// @param gcar - G-vector Cartesian coordinates [npw * 3]
163163
/// @param tau - atomic positions [nat * 3]
164-
/// @param iat2it - atom to type mapping [nat]
165-
/// @param vloc_per_type - precomputed vloc factors per type [ntype * npw]
164+
/// @param vloc_per_type - precomputed vloc factors per atom [nat * npw]
166165
/// @param aux - charge density in G-space [npw]
167166
/// @param scale_factor - tpiba * omega
168167
///
@@ -174,7 +173,6 @@ struct cal_force_loc_sincos_op
174173
const int& ntype,
175174
const FPTYPE* gcar,
176175
const FPTYPE* tau,
177-
const int* iat2it,
178176
const FPTYPE* vloc_per_type,
179177
const std::complex<FPTYPE>* aux,
180178
const FPTYPE& scale_factor,
@@ -319,7 +317,6 @@ struct cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_GPU>
319317
const int& ntype,
320318
const FPTYPE* gcar,
321319
const FPTYPE* tau,
322-
const int* iat2it,
323320
const FPTYPE* vloc_per_type,
324321
const std::complex<FPTYPE>* aux,
325322
const FPTYPE& scale_factor,

source/module_hamilt_pw/hamilt_pwdft/kernels/rocm/force_op.hip.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,6 @@ __global__ void cal_force_loc_sincos_kernel(
626626
const int ntype,
627627
const FPTYPE* gcar,
628628
const FPTYPE* tau,
629-
const int* iat2it,
630629
const FPTYPE* vloc_per_type,
631630
const thrust::complex<FPTYPE>* aux,
632631
const FPTYPE scale_factor,
@@ -641,7 +640,6 @@ __global__ void cal_force_loc_sincos_kernel(
641640
if (iat >= nat) return;
642641

643642
// Load atom information to registers
644-
const int it = iat2it[iat];
645643
const FPTYPE tau_x = tau[iat * 3 + 0];
646644
const FPTYPE tau_y = tau[iat * 3 + 1];
647645
const FPTYPE tau_z = tau[iat * 3 + 2];
@@ -663,7 +661,7 @@ __global__ void cal_force_loc_sincos_kernel(
663661
sincos(phase, &sinp, &cosp);
664662

665663
// Calculate force factor
666-
const FPTYPE vloc_factor = vloc_per_type[it * npw + ig];
664+
const FPTYPE vloc_factor = vloc_per_type[iat * npw + ig];
667665
const FPTYPE factor = vloc_factor * (cosp * aux[ig].imag() + sinp * aux[ig].real());
668666

669667
// Accumulate force contributions
@@ -747,7 +745,6 @@ void cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
747745
const int& ntype,
748746
const FPTYPE* gcar,
749747
const FPTYPE* tau,
750-
const int* iat2it,
751748
const FPTYPE* vloc_per_type,
752749
const std::complex<FPTYPE>* aux,
753750
const FPTYPE& scale_factor,
@@ -763,7 +760,7 @@ void cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
763760

764761
hipLaunchKernelGGL(cal_force_loc_sincos_kernel<FPTYPE>,
765762
grid, block, 0, 0,
766-
nat, npw, ntype, gcar, tau, iat2it, vloc_per_type,
763+
nat, npw, ntype, gcar, tau, vloc_per_type,
767764
reinterpret_cast<const thrust::complex<FPTYPE>*>(aux),
768765
scale_factor, force);
769766

0 commit comments

Comments
 (0)