Skip to content

Commit bbdb7bf

Browse files
authored
Merge branch 'develop' into pot_dfpt
2 parents 239c306 + 5a83d6a commit bbdb7bf

File tree

154 files changed

+904
-684
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+904
-684
lines changed

.github/workflows/cuda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
sudo apt-get update
3131
sudo apt-get install -y ccache
3232
33-
- name: Build
33+
- name: Configure & Build
3434
run: |
3535
nvidia-smi
3636
rm -rf build

CMakeLists.txt

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,63 @@ if(NOT ENABLE_MPI)
129129
endif()
130130

131131
# Different exe files of ABACUS
132-
if(ENABLE_LCAO AND ENABLE_MPI)
133-
set(ABACUS_BIN_NAME abacus)
134-
elseif(NOT ENABLE_LCAO AND ENABLE_MPI)
135-
set(ABACUS_BIN_NAME abacus_pw)
136-
elseif(NOT ENABLE_LCAO AND NOT ENABLE_MPI)
137-
set(ABACUS_BIN_NAME abacus_pw_serial)
138-
elseif(ENABLE_LCAO AND NOT ENABLE_MPI)
139-
set(ABACUS_BIN_NAME abacus_serial)
132+
unset(ABACUS_BIN_NAME CACHE)
133+
134+
# Case : LCAO or PW
135+
if(ENABLE_LCAO)
136+
# Case: CUDA is enabled (suffix with 'g' for GPU)
137+
if(USE_CUDA)
138+
if(ENABLE_MPI)
139+
if(NOT ENABLE_LIBRI AND NOT ENABLE_MLALGO)
140+
set(ABACUS_BIN_NAME abacus_2g)
141+
elseif(ENABLE_LIBRI AND NOT ENABLE_MLALGO)
142+
set(ABACUS_BIN_NAME abacus_3g)
143+
elseif(NOT ENABLE_LIBRI AND ENABLE_MLALGO)
144+
set(ABACUS_BIN_NAME abacus_4g)
145+
elseif(ENABLE_LIBRI AND ENABLE_MLALGO)
146+
set(ABACUS_BIN_NAME abacus_5g)
147+
endif()
148+
endif()
149+
# Case: CPU is enabled (suffix with 'p' for parallel)
150+
else()
151+
if(ENABLE_MPI)
152+
if(NOT ENABLE_LIBRI AND NOT ENABLE_MLALGO)
153+
set(ABACUS_BIN_NAME abacus_2p)
154+
elseif(ENABLE_LIBRI AND NOT ENABLE_MLALGO)
155+
set(ABACUS_BIN_NAME abacus_3p)
156+
elseif(NOT ENABLE_LIBRI AND ENABLE_MLALGO)
157+
set(ABACUS_BIN_NAME abacus_4p)
158+
elseif(ENABLE_LIBRI AND ENABLE_MLALGO)
159+
set(ABACUS_BIN_NAME abacus_5p)
160+
endif()
161+
else()
162+
if(NOT ENABLE_LIBRI AND NOT ENABLE_MLALGO)
163+
set(ABACUS_BIN_NAME abacus_2s)
164+
elseif(ENABLE_LIBRI AND NOT ENABLE_MLALGO)
165+
set(ABACUS_BIN_NAME abacus_3s)
166+
elseif(NOT ENABLE_LIBRI AND ENABLE_MLALGO)
167+
set(ABACUS_BIN_NAME abacus_4s)
168+
elseif(ENABLE_LIBRI AND ENABLE_MLALGO)
169+
set(ABACUS_BIN_NAME abacus_5s)
170+
endif()
171+
endif()
172+
endif()
173+
# Case : PW only
174+
else()
175+
if(USE_CUDA)
176+
if(ENABLE_MPI)
177+
set(ABACUS_BIN_NAME abacus_1g)
178+
endif()
179+
else()
180+
if(ENABLE_MPI)
181+
set(ABACUS_BIN_NAME abacus_1p)
182+
else()
183+
set(ABACUS_BIN_NAME abacus_1s)
184+
endif()
185+
endif()
140186
endif()
141187

188+
142189
# Use DSP hardware
143190
if (USE_DSP)
144191
set(USE_ELPA OFF)
@@ -488,12 +535,12 @@ elseif(NOT USE_SW)
488535
find_package(Lapack REQUIRED)
489536
include_directories(${FFTW3_INCLUDE_DIRS})
490537
list(APPEND math_libs FFTW3::FFTW3 LAPACK::LAPACK BLAS::BLAS)
491-
if(USE_DSP)
492-
target_link_libraries(${ABACUS_BIN_NAME} ${SCALAPACK_LIBRARY_DIR})
493-
else()
494-
find_package(ScaLAPACK REQUIRED)
495-
list(APPEND math_libs ScaLAPACK::ScaLAPACK)
496-
endif()
538+
if(USE_DSP)
539+
target_link_libraries(${ABACUS_BIN_NAME} ${SCALAPACK_LIBRARY_DIR})
540+
else()
541+
find_package(ScaLAPACK REQUIRED)
542+
list(APPEND math_libs ScaLAPACK::ScaLAPACK)
543+
endif()
497544
if(USE_OPENMP)
498545
list(APPEND math_libs FFTW3::FFTW3_OMP)
499546
endif()
@@ -735,7 +782,7 @@ if(ENABLE_LCAO)
735782
target_link_libraries(${ABACUS_BIN_NAME} genelpa)
736783
endif()
737784
if(USE_CUDA)
738-
target_link_libraries(diag_cusolver)
785+
target_link_libraries(${ABACUS_BIN_NAME} diag_cusolver)
739786
endif()
740787
endif()
741788
if(ENABLE_RAPIDJSON)
@@ -758,6 +805,9 @@ install(PROGRAMS ${ABACUS_BIN_PATH}
758805
# DESTINATION ${CMAKE_INSTALL_BINDIR}
759806
)
760807

808+
# Create a symbolic link 'abacus' pointing to the actual executable
809+
install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${ABACUS_BIN_NAME} ${CMAKE_INSTALL_PREFIX}/bin/abacus WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/bin)")
810+
761811
if(ENABLE_COVERAGE)
762812
coverage_evaluate()
763813
endif()

docs/advanced/acceleration/cuda.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ The ABACUS program will automatically determine whether the current ELPA support
3737

3838
In `INPUT` file we need to set the input parameter [device](../input_files/input-main.md#device) to `gpu`. If this parameter is not set, ABACUS will try to determine if there are available GPUs.
3939
- Set `ks_solver`: For the PW basis, CG, BPCG and Davidson methods are supported on GPU; set the input parameter [ks_solver](../input_files/input-main.md#ks_solver) to `cg`, `bpcg` or `dav`. For the LCAO basis, `cusolver`, `cusolvermp` and `elpa` is supported on GPU.
40-
- **multi-card**: ABACUS allows for multi-GPU acceleration. If you have multiple GPU cards, you can run ABACUS with several MPI processes, and each process will utilize one GPU card. For example, the command `mpirun -n 2 abacus` will by default launch two GPUs for computation. If you only have one card, this command will only start one GPU.
40+
- **single-card**: ABACUS allows for single-GPU acceleration. You can run ABACUS without any MPI process by command `abacus`, and `ks_solver cusolver` is recommended for the LCAO basis. *note: avoid using `mpirun -n 1 abacus`*.
41+
- **multi-cards**: ABACUS allows for multi-GPU acceleration. If you have multiple GPU cards, you can run ABACUS with several MPI processes, and each process will utilize one GPU card. For example, the command `mpirun -n 2 abacus` will by default launch two GPUs for computation. If you only have one card, this command will only start one GPU. *note: the number of MPI processes SHOULD be equal to the number of GPU cards, unless you are using MPS in your computer.*
4142

4243
## Examples
4344
We provides [examples](https://github.com/deepmodeling/abacus-develop/tree/develop/examples/gpu) of gpu calculations.

examples/36_gpu/si16_lcao/INPUT

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@ INPUT_PARAMETERS
33
suffix autotest
44
calculation scf
55
device gpu
6-
gamma_only 1 # GPU acceleration currently only support gamma_only set to 1. ### Abacus will generate/overwrite a KPT file when gamma_only is set to 1.
76
ks_solver cusolver # if not set, the default ks_solver is cusolver,
8-
# you can also choose genelpa or scalapack_gvx.
7+
# you can also choose cusolvermp or elpa if compiled.
98

109
#nbands 8
1110
symmetry 1
@@ -26,7 +25,7 @@ smearing_sigma 0.002
2625

2726
#Parameters (5.Mixing)
2827
mixing_type broyden
29-
mixing_beta 0.3
28+
mixing_beta 0.4
3029

3130

3231
### [1] Energy cutoff determines the quality of numerical quadratures in your calculations.

examples/36_gpu/si16_lcao/KPT

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
K_POINTS
22
0
33
Gamma
4-
1 1 1 0 0 0
5-
###This file will be overwritten by Abacus because either kspacing is used or gamma_only is set to 1
4+
5 5 5 0 0 0
5+
### If you are running an energy calculation, please make sure your final energy is
6+
### converged with respect to the k-point settings, unless you set a loose k-point
7+
### mesh on purpose.

examples/36_gpu/si16_pw/INPUT

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ smearing_sigma 0.002
2121

2222
#Parameters (5.Mixing)
2323
mixing_type broyden
24-
mixing_beta 0.3
24+
mixing_beta 0.4
2525

2626

2727
### [1] Energy cutoff determines the quality of numerical quadratures in your calculations.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#ifndef MODULE_BASE_TRUNCATED_FUNC_H
2+
#define MODULE_BASE_TRUNCATED_FUNC_H
3+
4+
#include "source_base/libm/libm.h"
5+
#include <cstdint>
6+
#include <cstring>
7+
#include <complex>
8+
9+
namespace ModuleBase
10+
{
11+
12+
/**
13+
* @brief Truncated exponential function to avoid underflow.
14+
*
15+
* This function returns 0 if the real part of the input is less than -230.0,
16+
* otherwise it calls ModuleBase::libm::exp(x).
17+
*
18+
* @tparam FPTYPE The floating point type (float, double, or complex).
19+
* @param x The input value.
20+
* @return FPTYPE The result of the exponential function.
21+
*/
22+
template <typename FPTYPE>
23+
inline FPTYPE truncated_exp(FPTYPE x)
24+
{
25+
if (std::real(x) < -230.0)
26+
{
27+
return static_cast<FPTYPE>(0.0);
28+
}
29+
return ModuleBase::libm::exp(x);
30+
}
31+
32+
/**
33+
* @brief Truncated complementary error function to avoid underflow for large arguments.
34+
*
35+
* This function returns 0 if the real part of the input is greater than 20.0,
36+
* otherwise it calls std::erfc(x).
37+
*
38+
* @tparam FPTYPE The floating point type (float, double, or complex).
39+
* @param x The input value.
40+
* @return FPTYPE The result of the erfc function.
41+
*/
42+
template <typename FPTYPE>
43+
inline FPTYPE truncated_erfc(FPTYPE x)
44+
{
45+
if (std::real(x) > 20.0)
46+
{
47+
return static_cast<FPTYPE>(0.0);
48+
}
49+
return std::erfc(x);
50+
}
51+
52+
/**
53+
* @brief Truncated value function to avoid underflow.
54+
*
55+
* This function returns 0 if the absolute value of the input is less than 1.0e-30,
56+
* otherwise it returns the input x.
57+
*
58+
* @tparam FPTYPE The floating point type (float, double, or complex).
59+
* @param x The input value.
60+
* @return FPTYPE The result of the truncation.
61+
*/
62+
/**
63+
* @brief Truncated value function to avoid underflow.
64+
*
65+
* This function modifies the input to 0 if its absolute value is less than 1.0e-30.
66+
*
67+
* @tparam FPTYPE The floating point type (float, double, or complex).
68+
* @param x The input value to be checked and possibly truncated.
69+
*/
70+
template <typename FPTYPE>
71+
inline void truncated_underflow(FPTYPE& x)
72+
{
73+
if (std::abs(x) < 1.0e-30)
74+
{
75+
x = static_cast<FPTYPE>(0.0);
76+
}
77+
}
78+
79+
template <>
80+
inline void truncated_underflow(double& x)
81+
{
82+
const uint64_t u = *reinterpret_cast<const uint64_t*>(&x);
83+
// The exponent bits are 52-62 (11 bits). The bias is 1023.
84+
// 1e-30 corresponds to -100 in base-2 exponent roughly.
85+
// 923 = 1023 - 100.
86+
if (((u >> 52) & 0x7FF) <= 923)
87+
{
88+
x = 0.0;
89+
}
90+
}
91+
92+
template <>
93+
inline void truncated_underflow(float& x)
94+
{
95+
const uint32_t u = *reinterpret_cast<const uint32_t*>(&x);
96+
// The exponent bits are 23-30 (8 bits). The bias is 127.
97+
// 1e-30 corresponds to -100 in base-2 exponent roughly.
98+
// 27 = 127 - 100.
99+
if (((u >> 23) & 0xFF) <= 27)
100+
{
101+
x = 0.0f;
102+
}
103+
}
104+
105+
template <typename T>
106+
inline void truncated_underflow(std::complex<T>& x)
107+
{
108+
T* ptr = reinterpret_cast<T*>(&x);
109+
truncated_underflow(ptr[0]);
110+
truncated_underflow(ptr[1]);
111+
}
112+
113+
114+
} // namespace ModuleBase
115+
116+
#endif // MODULE_BASE_TRUNCATED_FUNC_H

source/source_basis/module_nao/test/CMakeLists.txt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ AddTest(
1616
../numerical_radial.cpp
1717
../../module_ao/ORB_atomic_lm.cpp
1818
../../module_ao/ORB_atomic.cpp
19-
../../../source_io/orb_io.cpp
19+
../../../source_io/module_output/orb_io.cpp
2020
LIBS parameter ${math_libs} device base
2121
)
2222

@@ -29,7 +29,7 @@ AddTest(
2929
../numerical_radial.cpp
3030
../../module_ao/ORB_atomic_lm.cpp
3131
../../module_ao/ORB_atomic.cpp
32-
../../../source_io/orb_io.cpp
32+
../../../source_io/module_output/orb_io.cpp
3333
LIBS parameter ${math_libs} device base
3434
)
3535

@@ -42,7 +42,7 @@ AddTest(
4242
../numerical_radial.cpp
4343
../../module_ao/ORB_atomic_lm.cpp
4444
../../module_ao/ORB_atomic.cpp
45-
../../../source_io/orb_io.cpp
45+
../../../source_io/module_output/orb_io.cpp
4646
LIBS parameter ${math_libs} device base
4747
)
4848

@@ -55,7 +55,7 @@ AddTest(
5555
../numerical_radial.cpp
5656
../../module_ao/ORB_atomic_lm.cpp
5757
../../module_ao/ORB_atomic.cpp
58-
../../../source_io/orb_io.cpp
58+
../../../source_io/module_output/orb_io.cpp
5959
LIBS parameter ${math_libs} device base
6060
)
6161

@@ -68,7 +68,7 @@ AddTest(
6868
../numerical_radial.cpp
6969
../../module_ao/ORB_atomic_lm.cpp
7070
../../module_ao/ORB_atomic.cpp
71-
../../../source_io/orb_io.cpp
71+
../../../source_io/module_output/orb_io.cpp
7272
LIBS parameter ${math_libs} device base
7373
)
7474

@@ -86,7 +86,7 @@ AddTest(
8686
../sphbes_radials.cpp
8787
../../module_ao/ORB_atomic_lm.cpp
8888
../../module_ao/ORB_atomic.cpp
89-
../../../source_io/orb_io.cpp
89+
../../../source_io/module_output/orb_io.cpp
9090
LIBS parameter ${math_libs} device base
9191
)
9292

@@ -106,7 +106,7 @@ AddTest(
106106
../two_center_bundle.cpp
107107
../two_center_integrator.cpp
108108
../real_gaunt_table.cpp
109-
../../../source_io/orb_io.cpp
109+
../../../source_io/module_output/orb_io.cpp
110110
LIBS parameter ${math_libs} device base container orb
111111
)
112112

@@ -135,7 +135,7 @@ AddTest(
135135
../radial_set.cpp
136136
../numerical_radial.cpp
137137
../two_center_bundle.cpp
138-
../../../source_io/orb_io.cpp
138+
../../../source_io/module_output/orb_io.cpp
139139
LIBS parameter ${math_libs} device base container orb
140140
)
141141

@@ -155,7 +155,7 @@ AddTest(
155155
../sphbes_radials.cpp
156156
../radial_set.cpp
157157
../numerical_radial.cpp
158-
../../../source_io/orb_io.cpp
158+
../../../source_io/module_output/orb_io.cpp
159159
LIBS parameter ${math_libs} device base container orb
160160
)
161161

@@ -175,7 +175,7 @@ AddTest(
175175
../sphbes_radials.cpp
176176
../radial_set.cpp
177177
../numerical_radial.cpp
178-
../../../source_io/orb_io.cpp
178+
../../../source_io/module_output/orb_io.cpp
179179
LIBS parameter ${math_libs} device base container orb
180180
)
181181

source/source_esolver/esolver_dm2rho.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
#include "source_lcao/LCAO_domain.h"
88
#include "source_lcao/hamilt_lcao.h"
99
#include "source_lcao/module_operator_lcao/operator_lcao.h"
10-
#include "source_io/cube_io.h"
11-
#include "../source_io/module_ml/io_npz.h"
10+
#include "source_io/module_output/cube_io.h"
11+
#include "source_io/module_ml/io_npz.h"
1212
#include "source_io/module_output/print_info.h"
1313
#include "source_lcao/rho_tau_lcao.h" // mohan add 2025-10-24
1414

source/source_esolver/esolver_dp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include "source_base/parallel_common.h"
2323
#include "source_base/timer.h"
2424
#include "source_io/module_output/output_log.h"
25-
#include "source_io/cif_io.h"
25+
#include "source_io/module_output/cif_io.h"
2626

2727
#include <iomanip>
2828
#include <sstream>

0 commit comments

Comments
 (0)