Skip to content

Commit 8d02233

Browse files
committed
Merge branch 'cusolver-kpar' of https://github.com/dzzz2001/abacus-develop into cusolver-kpar
2 parents 45089fe + 60a9ffc commit 8d02233

File tree

162 files changed

+5343
-2622
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+5343
-2622
lines changed

CMakeLists.txt

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,39 +10,45 @@ project(
1010
HOMEPAGE_URL "https://github.com/deepmodeling/abacus-develop"
1111
LANGUAGES CXX)
1212

13-
option(ENABLE_LCAO "Enable LCAO calculation." ON)
14-
option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
15-
option(ENABLE_MLKEDF "Enable Machine Learning based KEDF for OFDFT" OFF)
16-
option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
17-
option(USE_CUDA "Enable support to CUDA for ABACUS." OFF)
18-
option(ENABLE_FLOAT_FFTW "Enable support to single precision FFTW library." OFF)
19-
option(USE_ROCM "Enable support to ROCm." OFF)
20-
option(USE_OPENMP "Enable OpenMP in ABACUS." ON)
13+
option(ENABLE_MPI "Enable MPI" ON)
14+
option(USE_OPENMP "Enable OpenMP" ON)
15+
option(USE_CUDA "Enable CUDA" OFF)
16+
option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
17+
option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
18+
option(USE_ROCM "Enable ROCm" OFF)
19+
option(USE_DSP "Enable DSP" OFF)
20+
21+
option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF)
22+
option(ENABLE_LIBXC "Enable using the LibXC package" OFF)
23+
option(ENABLE_FLOAT_FFTW "Enable using single-precision FFTW library." OFF)
24+
option(ENABLE_DEEPKS "Enable the DeePKS algorithm" OFF)
25+
option(ENABLE_MLKEDF "Enable the Machine-Learning-based KEDF for OFDFT" OFF)
26+
27+
option(ENABLE_LCAO "Enable LCAO algorithm" ON)
28+
option(USE_ELPA "Enable ELPA for LCAO" ON)
29+
option(ENABLE_LIBRI "Enable LibRI for hybrid functional" OFF)
30+
option(ENABLE_LIBCOMM "Enable LibComm" OFF)
31+
option(ENABLE_PEXSI "Enable PEXSI for LCAO" OFF)
32+
33+
option(BUILD_TESTING "Build unittests" OFF)
34+
option(DEBUG_INFO "Print message to debug" OFF)
2135
option(ENABLE_ASAN "Enable AddressSanitizer" OFF)
22-
option(BUILD_TESTING "Build ABACUS unit tests" OFF)
23-
option(INFO "Enable gathering of math library information" OFF)
24-
option(ENABLE_COVERAGE "Enable coverage build." OFF)
25-
option(ENABLE_LIBRI "Enable EXX with LibRI." OFF)
26-
option(ENABLE_LIBCOMM "Enable communicate with LibComm." OFF)
27-
option(ENABLE_PAW "Enable PAW calculation" OFF)
28-
option(ENABLE_MPI "Enable compilation with or without MPI." ON)
29-
option(USE_ELPA "Enable ELPA" ON)
30-
option(USE_ABACUS_LIBM "Build libmath from source to speed up." OFF)
36+
option(INFO "Enable gathering math library information" OFF)
37+
option(ENABLE_COVERAGE "Enable coverage build" OFF)
3138
option(GIT_SUBMODULE "Check submodules during build" ON)
32-
option(DEBUG_INFO "Print message for developers to debug." OFF)
39+
40+
option(ENABLE_PAW "Enable PAW method" OFF)
41+
3342
# Do not enable it if generated code will run on different CPUs
3443
option(ENABLE_NATIVE_OPTIMIZATION
3544
"Enable compilation optimization for the native machine's CPU type" OFF)
45+
3646
option(COMMIT_INFO "Print commit information in log" ON)
37-
option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
38-
option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
39-
option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
40-
option(ENABLE_CNPY "Enable cnpy usage." OFF)
41-
option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
42-
option(ENABLE_CUSOLVERMP "Enable cusolvermp." OFF)
43-
option(USE_DSP "Enable DSP usage." OFF)
44-
option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
45-
option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
47+
option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method" ON)
48+
option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage" OFF)
49+
option(ENABLE_RAPIDJSON "Enable rapid-json usage" OFF)
50+
option(ENABLE_CNPY "Enable cnpy usage" OFF)
51+
option(ENABLE_CUSOLVERMP "Enable cusolvermp" OFF)
4652

4753
# enable json support
4854
if(ENABLE_RAPIDJSON)

docs/advanced/acceleration/cuda.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ We provides [examples](https://github.com/deepmodeling/abacus-develop/tree/devel
4646
PW basis:
4747
- Only k point parallelization is supported, so the input keyword `kpar` will be set to match the number of MPI tasks automatically.
4848
- By default, CUDA architectures 60, 70, 75, 80, 86, and 89 are compiled (if supported). It can be overriden using the CMake variable [`CMAKE_CUDA_ARCHITECTURES`](https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_ARCHITECTURES.html) or the environmental variable [`CUDAARCHS`](https://cmake.org/cmake/help/latest/envvar/CUDAARCHS.html).
49+
4950
LCAO basis:
5051
- Unless there is a specific reason, avoid using multiple GPUs, as it can be slower than using a single GPU. This is because the generalized eigenvalue solution of the LCAO basis set will incur additional communication overhead when calculated on multiple cards. When the memory limit of a GPU card makes it insufficient to complete the task, it is recommended to use multiple cards for calculation.
5152
- When using elpa on GPUs, some ELPA internal logs will be output.

docs/advanced/input_files/input-main.md

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@
140140
- [out\_wfc\_r](#out_wfc_r)
141141
- [out\_wfc\_lcao](#out_wfc_lcao)
142142
- [out\_dos](#out_dos)
143+
- [out\_ldos](#out_ldos)
143144
- [out\_band](#out_band)
144145
- [out\_proj\_band](#out_proj_band)
145146
- [out\_stru](#out_stru)
@@ -154,6 +155,7 @@
154155
- [out\_mat\_dh](#out_mat_dh)
155156
- [out\_mat\_xc](#out_mat_xc)
156157
- [out\_mat\_xc2](#out_mat_xc2)
158+
- [out\_mat\_l](#out_mat_l)
157159
- [out\_eband\_terms](#out_eband_terms)
158160
- [out\_hr\_npz/out\_dm\_npz](#out_hr_npzout_dm_npz)
159161
- [dm\_to\_rho](#dm_to_rho)
@@ -174,6 +176,7 @@
174176
- [dos\_emin\_ev](#dos_emin_ev)
175177
- [dos\_emax\_ev](#dos_emax_ev)
176178
- [dos\_nche](#dos_nche)
179+
- [stm\_bias](#stm_bias)
177180
- [NAOs](#naos)
178181
- [bessel\_nao\_ecut](#bessel_nao_ecut)
179182
- [bessel\_nao\_tolerence](#bessel_nao_tolerence)
@@ -1702,10 +1705,15 @@ These variables are used to control the output of properties.
17021705
- **Description**: Whether to output the density of states (DOS). For more information, refer to the [dos.md](../elec_properties/dos.md).
17031706
- 0: no output
17041707
- 1: output the density of states (DOS)
1705-
- 2:
1706-
- lcao-only: output the density of states (DOS) and the projected density of states (PDOS)
1708+
- 2: (lcao-only) output the density of states (DOS) and the projected density of states (PDOS)
17071709
- **Default**: 0
17081710

1711+
### out_ldos
1712+
1713+
- **Type**: Boolean
1714+
- **Description**: Whether to output the local density of states for given bias in cube file format, which is controlled by [stm_bias](#stm_bias).
1715+
- **Default**: False
1716+
17091717
### out_band
17101718

17111719
- **Type**: Boolean \[Integer\](optional)
@@ -1807,6 +1815,13 @@ The band (KS orbital) energy for each (k-point, spin, band) will be printed in t
18071815
- **Description**: Whether to print the exchange-correlation matrices in **numerical orbital representation** (unit: Ry): $\braket{\phi_i|V_\text{xc}^\text{(semi-)local}+V_\text{exx}+V_\text{DFTU}|\phi_j}(\mathbf{R})$ in CSR format (the same format as [out_mat_hs2](../elec_properties/hs_matrix.md#out_mat_hs2)) in the directory `OUT.${suffix}`. (Note that currently DeePKS term is not included. ) The files are named `Vxc_R_spin$s`.
18081816
- **Default**: False
18091817

1818+
### out_mat_l
1819+
1820+
- **Type**: Boolean [Integer\](optional)
1821+
- **Availability**: Numerical atomic orbital (NAO) basis
1822+
- **Description**: Whether to print the expectation value of the angular momentum operator $\hat{L}_x$, $\hat{L}_y$, and $\hat{L}_z$ in the basis of the localized atomic orbitals. The files are named `OUT.${suffix}/${suffix}_Lx.dat`, `OUT.${suffix}/${suffix}_Ly.dat`, and `OUT.${suffix}/${suffix}_Lz.dat`. The second integer controls the precision of the output.
1823+
- **Default**: False 8
1824+
18101825
### out_eband_terms
18111826

18121827
- **Type**: Boolean
@@ -1955,9 +1970,20 @@ These variables are used to control the calculation of DOS. [Detailed introducti
19551970
### dos_nche
19561971

19571972
- **Type**: Integer
1958-
The order of Chebyshev expansions when using Stochastic Density Functional Theory (SDFT) to calculate DOS.
1973+
- **Description**: The order of Chebyshev expansions when using Stochastic Density Functional Theory (SDFT) to calculate DOS.
19591974
- **Default**: 100
19601975

1976+
### stm_bias
1977+
1978+
- **Type**: Real Real(optional) Integer(optional)
1979+
- **Description**: The bias voltage used to calculate local density of states to simulate scanning tunneling microscope, see details in [out_ldos](#out_ldos). When using three parameters:
1980+
1981+
- The first parameter specifies the initial bias voltage value.
1982+
- The second parameter defines the voltage increment (step size between consecutive bias values).
1983+
- The third parameter determines the total number of voltage points
1984+
- **Default**: 1.0
1985+
- **Unit**: V
1986+
19611987
[back to top](#full-list-of-input-keywords)
19621988

19631989
## NAOs
@@ -2004,10 +2030,13 @@ Warning: this function is not robust enough for the current version. Please try
20042030

20052031
### deepks_out_labels
20062032

2007-
- **Type**: Boolean
2033+
- **Type**: Integer
20082034
- **Availability**: numerical atomic orbital basis
2009-
- **Description**: Print labels and descriptors for DeePKS training in OUT.${suffix}. The names of these files start with "deepks".
2010-
- **Note**: In `LCAO` calculation, the path of a numerical descriptor (an `orb` file) is needed to be specified under the `NUMERICAL_DESCRIPTOR` tag in the `STRU` file. For example:
2035+
- **Description**: Print labels and descriptors for DeePKS in OUT.${suffix}. The names of these files start with "deepks".
2036+
- 0 : No output.
2037+
- 1 : Output intermediate files needed during DeePKS training.
2038+
- 2 : Output target labels for label preperation. The label files are named as `deepks_<property>.npy`, where the units and formats are the same as label files `<property>.npy` required for training, except that the first dimension (`nframes`) is excluded. System structrue files are also given in `deepks_atom.npy` and `deepks_box.npy` in the unit of *Bohr*, which means `lattice_constant` should be set to 1 when training.
2039+
- **Note**: When `deepks_out_labels` equals **1**, the path of a numerical descriptor (an `orb` file) is needed to be specified under the `NUMERICAL_DESCRIPTOR` tag in the `STRU` file. For example:
20112040

20122041
```text
20132042
NUMERICAL_ORBITAL
@@ -2017,8 +2046,8 @@ Warning: this function is not robust enough for the current version. Please try
20172046
NUMERICAL_DESCRIPTOR
20182047
jle.orb
20192048
```
2020-
2021-
- **Default**: False
2049+
This is not needed when `deepks_out_labels` equals 2.
2050+
- **Default**: 0
20222051

20232052
### deepks_scf
20242053

@@ -3858,12 +3887,6 @@ These variables are used to control berry phase and wannier90 interface paramete
38583887
- 1: Yes.
38593888
- **Default**: 0
38603889

3861-
- **Type**: Boolean
3862-
- **Description**: Specify whether to set the colorful output in terminal.
3863-
- 0: No.
3864-
- 1: Yes.
3865-
- **Default**: 0
3866-
38673890
### test_skip_ewald
38683891

38693892
- **Type**: Boolean

source/Makefile.Objects

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,9 @@ OBJS_IO=input_conv.o\
497497
write_dos_pw.o\
498498
nscf_band.o\
499499
cal_dos.o\
500+
cal_pdos_gamma.o\
501+
cal_pdos_multik.o\
502+
cal_ldos.o\
500503
cif_io.o\
501504
dos_nao.o\
502505
numerical_descriptor.o\
@@ -554,6 +557,7 @@ OBJS_IO=input_conv.o\
554557
read_input_item_output.o\
555558
read_set_globalv.o\
556559
orb_io.o\
560+
cal_pLpR.o\
557561

558562
OBJS_IO_LCAO=cal_r_overlap_R.o\
559563
write_orb_info.o\

source/module_base/parallel_global.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,16 @@ void Parallel_Global::divide_mpi_groups(const int& procs,
405405
exit(1);
406406
}
407407

408-
if(rank < extra_procs)
408+
if(rank < extra_procs * (procs_in_group + 1))
409409
{
410+
// The first extra_procs groups have procs_in_group + 1 processes.
410411
procs_in_group++;
411412
my_group = rank / procs_in_group;
412413
rank_in_group = rank % procs_in_group;
413414
}
414415
else
415416
{
417+
// The remaining groups have procs_in_group processes.
416418
my_group = (rank - extra_procs) / procs_in_group;
417419
rank_in_group = (rank - extra_procs) % procs_in_group;
418420
}

source/module_base/test/timer_test.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,22 @@ TEST_F(TimerTest, Tick)
5959
EXPECT_GT(ModuleBase::timer::timer_pool["wavefunc"]["evc"].cpu_second,0.0001);
6060
}
6161

62+
6263
TEST_F(TimerTest, Start)
6364
{
6465
ModuleBase::timer::start();
6566
// start() called tick() once
6667
EXPECT_FALSE(ModuleBase::timer::timer_pool[""]["total"].start_flag);
6768
}
6869

70+
6971
TEST_F(TimerTest, write_to_json)
7072
{
7173
ModuleBase::timer::tick("wavefunc","evc");
7274
std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms
7375
ModuleBase::timer::tick("wavefunc","evc");
7476
ModuleBase::timer::write_to_json("tmp.json");
77+
7578
// check if tmp.json exists
7679
ifs.open("tmp.json");
7780
EXPECT_TRUE(ifs.good());
@@ -89,9 +92,6 @@ TEST_F(TimerTest, write_to_json)
8992
content += tmp;
9093
}
9194

92-
// check if the content is correct
93-
// shuold be like this:
94-
// {"total": 0, "sub":[{"class_name": "wavefunc","sub":[{"name":"evc","cpu_second": 0.000318,"calls":2,"cpu_second_per_call":0.000159,"cpu_second_per_total": null}]}]}
9595
EXPECT_THAT(content,testing::HasSubstr("\"total\":"));
9696
EXPECT_THAT(content,testing::HasSubstr("\"sub\":[{\"class_name\":\"wavefunc\",\"sub\":[{\"name\":\"evc\",\"cpu_second\":"));
9797
EXPECT_THAT(content,testing::HasSubstr("\"calls\":2,\"cpu_second_per_call\":"));
@@ -106,25 +106,29 @@ TEST_F(TimerTest, PrintAll)
106106
ModuleBase::timer::tick("wavefunc","evc");
107107
std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms
108108
ModuleBase::timer::tick("wavefunc","evc");
109+
109110
// call print_all
110111
ofs.open("tmp");
111112
testing::internal::CaptureStdout();
112113
ModuleBase::timer::print_all(ofs);
113114
output = testing::internal::GetCapturedStdout();
114115
ofs.close();
116+
115117
// checout output on screen
116-
std::cout << "Get captured stdout: \n" << std::endl;
117-
std::cout << output << std::endl;
118+
// std::cout << "Get captured stdout: \n" << std::endl;
119+
// std::cout << output << std::endl;
118120
EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS"));
119121
EXPECT_THAT(output,testing::HasSubstr("CLASS_NAME"));
120122
EXPECT_THAT(output,testing::HasSubstr("NAME"));
121123
EXPECT_THAT(output,testing::HasSubstr("TIME/s"));
122124
EXPECT_THAT(output,testing::HasSubstr("CALLS"));
123125
EXPECT_THAT(output,testing::HasSubstr("AVG/s"));
124126
EXPECT_THAT(output,testing::HasSubstr("PER/%"));
127+
125128
// check output in file
126129
ifs.open("tmp");
127-
std::cout << "Capture contents line by line from output file: \n" << std::endl;
130+
// std::cout << "Capture contents line by line from output file: \n" << std::endl;
131+
getline(ifs,output);
128132
getline(ifs,output);
129133
EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS"));
130134
getline(ifs,output);
@@ -139,36 +143,41 @@ TEST_F(TimerTest, PrintAll)
139143
remove("time.json");
140144
}
141145

146+
142147
TEST_F(TimerTest, PrintUntilNow)
143148
{
144149
long double time = ModuleBase::timer::print_until_now();
145150
EXPECT_TRUE(time>0.0);
146151
}
147152

153+
148154
TEST_F(TimerTest, Finish)
149155
{
150156
ModuleBase::timer::tick("wavefunc","evc");
151157
std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms
152158
ModuleBase::timer::tick("wavefunc","evc");
159+
153160
// call print_all
154161
ofs.open("tmp");
155162
testing::internal::CaptureStdout();
156163
ModuleBase::timer::finish(ofs);
157164
output = testing::internal::GetCapturedStdout();
158165
ofs.close();
159166
// checout output on screen
160-
std::cout << "Get captured stdout: \n" << std::endl;
161-
std::cout << output << std::endl;
167+
//std::cout << "Get captured stdout: \n" << std::endl;
168+
//std::cout << output << std::endl;
162169
EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS"));
163170
EXPECT_THAT(output,testing::HasSubstr("CLASS_NAME"));
164171
EXPECT_THAT(output,testing::HasSubstr("NAME"));
165172
EXPECT_THAT(output,testing::HasSubstr("TIME/s"));
166173
EXPECT_THAT(output,testing::HasSubstr("CALLS"));
167174
EXPECT_THAT(output,testing::HasSubstr("AVG/s"));
168175
EXPECT_THAT(output,testing::HasSubstr("PER/%"));
176+
169177
// check output in file
170178
ifs.open("tmp");
171-
std::cout << "Capture contents line by line from output file: \n" << std::endl;
179+
//std::cout << "Capture contents line by line from output file: \n" << std::endl;
180+
getline(ifs,output);
172181
getline(ifs,output);
173182
EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS"));
174183
getline(ifs,output);
@@ -182,7 +191,6 @@ TEST_F(TimerTest, Finish)
182191
ifs.close();
183192
}
184193

185-
// use __MPI to activate parallel environment
186194
#ifdef __MPI
187195
int main(int argc, char **argv)
188196
{

source/module_base/test/tool_quit_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ TEST_F(ToolQuitTest,warningquit)
9393
ifs.open("running.log");
9494
getline(ifs,output);
9595
// test output in running.log file
96-
EXPECT_THAT(output,testing::HasSubstr("!!!!!!!"));
96+
EXPECT_THAT(output,testing::HasSubstr("-------"));
9797
ifs.close();
9898
}
9999

@@ -116,7 +116,7 @@ TEST_F(ToolQuitTest,warningquit_with_ret)
116116
ifs.open("running.log");
117117
getline(ifs,output);
118118
// test output in running.log file
119-
EXPECT_THAT(output,testing::HasSubstr("!!!!!!!"));
119+
EXPECT_THAT(output,testing::HasSubstr("-------"));
120120
ifs.close();
121121
}
122122
// use __MPI to activate parallel environment

source/module_base/test_parallel/blacs_connector_test.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ do
1010
fi
1111
echo "TEST in parallel, nprocs=$i"
1212
mpirun -np $i ./blacs_connector
13+
if [[ $? -ne 0 ]]; then
14+
echo -e "\e[1;33m [ FAILED ] \e[0m"\
15+
"execute UT with $i cores error."
16+
exit 1
17+
fi
1318
break
1419
done
1520

source/module_base/test_parallel/parallel_2d_test.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,9 @@ for i in 2 3 4; do
99
fi
1010
echo "TEST in parallel, nprocs=$i"
1111
mpirun -np $i ./parallel_2d_test
12+
if [[ $? -ne 0 ]]; then
13+
echo -e "\e[1;33m [ FAILED ] \e[0m"\
14+
"execute UT with $i cores error."
15+
exit 1
16+
fi
1217
done

0 commit comments

Comments
 (0)