Skip to content

Commit 78da69f

Browse files
committed
Merge branch 'develop' of github.com:deepmodeling/abacus-develop into HSolver
2 parents 88ad345 + 4790ed2 commit 78da69f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+3549
-2002
lines changed

CMakeLists.txt

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ project(ABACUS
1313

1414
option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
1515
option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
16-
option(USE_CUDA "Enable support to CUDA." OFF)
16+
option(USE_CUDA "Enable support to CUDA for PW." OFF)
17+
option(USE_CUSOLVER_LCAO "Enable support to CUSOLVER for LCAO." OFF)
1718
option(USE_ROCM "Enable support to ROCm." OFF)
1819
option(USE_OPENMP " Enable OpenMP in abacus." ON)
1920
option(ENABLE_ASAN "Enable AddressSanitizer" OFF)
@@ -58,21 +59,32 @@ set(CMAKE_CXX_STANDARD 11)
5859
include(CheckLanguage)
5960
check_language(CUDA)
6061
if(CMAKE_CUDA_COMPILER)
61-
if(NOT DEFINED USE_CUDA)
62-
message("CUDA components detected. \nWill build the CUDA version of ABACUS.")
63-
set(USE_CUDA ON)
62+
if(NOT DEFINED USE_CUDA OR NOT DEFINED USE_CUSOLVER_LCAO)
63+
if (NOT DEFINED USE_CUDA AND NOT DEFINED USE_CUSOLVER_LCAO)
64+
message("CUDA components detected. \nWill build the CUDA for PW version of ABACUS by default.")
65+
set(USE_CUDA ON)
66+
set(USE_CUSOLVER_LCAO OFF)
67+
elseif (NOT DEFINED USE_CUDA)
68+
set(USE_CUDA OFF)
69+
else()
70+
set(USE_CUSOLVER_LCAO OFF)
71+
endif()
6472
else()
65-
if(NOT USE_CUDA)
66-
message(WARNING "CUDA components detected, but USE_CUDA set to OFF. \nNOT building CUDA version of ABACUS.")
73+
if(NOT USE_CUDA AND NOT USE_CUSOLVER_LCAO)
74+
message(WARNING "CUDA components detected, but both USE_CUDA and USE_CUSOLVER_LCAO set to OFF. \nNOT building CUDA version of ABACUS.")
75+
elseif (USE_CUDA AND USE_CUSOLVER_LCAO)
76+
message(FATAL_ERROR "USE_CUDA and USE_CUSOLVER_LCAO set, but now they not allowed to coexist.")
6777
endif()
6878
endif()
6979
else() # CUDA not found
70-
if (USE_CUDA)
71-
message(FATAL_ERROR "USE_CUDA set but no CUDA components found.")
80+
if (USE_CUDA OR USE_CUSOLVER_LCAO)
81+
message(FATAL_ERROR "USE_CUDA or USE_CUSOLVER_LCAO set but no CUDA components found.")
7282
set(USE_CUDA OFF)
83+
set(USE_CUSOLVER_LCAO OFF)
7384
endif()
7485
endif()
75-
if(USE_CUDA)
86+
87+
if(USE_CUDA OR USE_CUSOLVER_LCAO)
7688
set(CMAKE_CXX_STANDARD 14)
7789
set(CMAKE_CXX_EXTENSIONS ON)
7890
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -92,11 +104,18 @@ if(USE_CUDA)
92104
60 # P100
93105
70 # V100
94106
75 # T4
107+
80 # A100
95108
)
96109
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
97-
add_compile_definitions(__CUDA)
110+
if (USE_CUDA)
111+
add_compile_definitions(__CUDA)
112+
endif()
113+
if (USE_CUSOLVER_LCAO)
114+
add_compile_definitions(__CUSOLVER_LCAO)
115+
endif()
98116
endif()
99117

118+
100119
# Warning: CMake add support to HIP in version 3.21. This is rather a new version.
101120
# Use cmake with AMD-ROCm: https://rocmdocs.amd.com/en/latest/Installation_Guide/Using-CMake-with-AMD-ROCm.html
102121
if(USE_ROCM)

docs/features.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ equation. For PW basis, there are CG and Blocked Davidson methods for solving th
7474
equation for each basis.
7575

7676
- PW: ks_solver = ‘cg’ or ‘dav’
77-
- LCAO: ks_solver = ‘hpseps’ , ‘genelpa’ or ‘lapack’
77+
- LCAO: ks_solver = ‘hpseps’ , ‘genelpa’ , ‘scalapack_gvx’ or 'cusolver'
7878
- LCAO_in_PW: ks_solver = ‘lapack’
7979

8080
If you set ks_solver=‘hpseps’ for basis_type=‘pw’, the program will be stopped with an error

docs/input-main.md

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
- [System variables](#system-variables)
1010

11-
[suffix](#suffix) | [ntype](#ntype) | [calculation](#calculation) | [symmetry](#symmetry) | [kpar](#kpar) | [latname](#latname) | [init_wfc](#init_wfc) | [init_chg](#init_chg) | [init_vel](#init_vel) | [nelec](#nelec) | [tot_magnetization](#tot-magnetization) | [dft_functional](#dft-functional) | [pseudo_type](#pseudo-type) | [pseudo_rcut](#pseudo-rcut) | [pseudo_mesh](#pseudo_mesh) | [mem_saver](#mem-saver) | [diago_proc](#diago_proc) | [nbspline](#nbspline)
11+
[suffix](#suffix) | [ntype](#ntype) | [calculation](#calculation) | [symmetry](#symmetry) | [kpar](#kpar) | [bndpar](#bndpar) | [latname](#latname) | [init_wfc](#init_wfc) | [init_chg](#init_chg) | [init_vel](#init_vel) | [nelec](#nelec) | [tot_magnetization](#tot-magnetization) | [dft_functional](#dft-functional) | [pseudo_type](#pseudo-type) | [pseudo_rcut](#pseudo-rcut) | [pseudo_mesh](#pseudo_mesh) | [mem_saver](#mem-saver) | [diago_proc](#diago_proc) | [nbspline](#nbspline)
1212

1313
- [Variables related to input files](#variables-related-to-input-files)
1414

@@ -24,7 +24,7 @@
2424

2525
- [Electronic structure](#electronic-structure)
2626

27-
[basis_type](#basis-type) | [ks_solver](#ks-solver) | [nbands](#nbands) | [nbands_istate](#nbands-istate) | [nspin](#nspin) | [occupations](#occupations) | [smearing_method](#smearing_method) | [smearing_sigma](#smearing_sigma) | [mixing_type](#mixing-type) | [mixing_beta](#mixing-beta) | [mixing_ndim](#mixing-ndim) | [mixing_gg0](#mixing-gg0) | [gamma_only](#gamma-only) | [printe](#printe) | [scf_nmax](#scf_nmax) | [scf_thr](#scf_thr) | [chg_extrap](#chg_extrap)
27+
[basis_type](#basis-type) | [ks_solver](#ks-solver) | [nbands](#nbands) | [nbands_sto](#nbands_sto) | [nbands_istate](#nbands-istate) | [nspin](#nspin) | [occupations](#occupations) | [smearing_method](#smearing_method) | [smearing_sigma](#smearing_sigma) | [mixing_type](#mixing-type) | [mixing_beta](#mixing-beta) | [mixing_ndim](#mixing-ndim) | [mixing_gg0](#mixing-gg0) | [gamma_only](#gamma-only) | [printe](#printe) | [scf_nmax](#scf_nmax) | [scf_thr](#scf_thr) | [chg_extrap](#chg_extrap) | [nche_sto](#nche_sto) | [emin_sto](#emin_sto) | [emax_sto](#emax_sto) | [seed_sto](#seed_sto)
2828

2929
- [Geometry relaxation](#geometry-relaxation)
3030

@@ -150,6 +150,12 @@ This part of variables are used to control general system parameters.
150150
- **Description**: devide all processors into kpar groups, and k points will be distributed among each group. The value taken should be less than or equal to the number of k points as well as the number of MPI threads.
151151
- **Default**: 1
152152

153+
#### bndpar
154+
155+
- **Type**: Integer
156+
- **Description**: devide all processors into bndpar groups, and bands (only stochastic orbitals now) will be distributed among each group. It should be larger than 0.
157+
- **Default**: 1
158+
153159
#### latname
154160

155161
- **Type**: String
@@ -435,6 +441,7 @@ calculations.
435441
- genelpa: This method should be used if you choose localized orbitals.
436442
- hpseps: old method, still used.
437443
- lapack: lapack can be used for localized orbitals, but is only used for single processor.
444+
- cusolver: this method needs building with the cusolver component for lcao and at least one gpu is available.
438445

439446
If you set ks_solver=`hpseps` for basis_type=`pw`, the program will be stopped with an error message:
440447

@@ -453,6 +460,14 @@ calculations.
453460
- nspin=1: 1.2\*occupied_bands, occupied_bands+10)
454461
- nspin=2: max(1.2\*nelec, nelec+20)
455462
463+
#### nbands_sto
464+
465+
- **Type**: Integer
466+
- **Description**:
467+
- nbands_sto>0: Number of stochastic orbitals to calculate in stochastic DFT (SDFT) or mix stochastic-deterministic (MDFT). More bands obtain more precise results or smaller stochastic errors ($ \propto 1/\sqrt{N_{\chi}}$);
468+
- nbands_sto=0: Complete basis will be used to replace stochastic orbitals with the Chebyshev method (CT) and it will get the results the same as KSDFT without stochastic errors.
469+
- **Default**: 0
470+
456471
#### nbands_istate
457472
458473
- **Type**: Integer
@@ -551,6 +566,32 @@ calculations.
551566
- second-order: second-order extrapolation
552567
- **Default**:atomic
553568
569+
#### nche_sto
570+
571+
- **Type**: Integer
572+
- **Description**: Chebyshev expansion orders for SDFT, MDFT, CT methods.
573+
- **Default**:5
574+
575+
#### emin_sto
576+
577+
- **Type**: Real
578+
- **Description**: Trial energy to guess the lower bound of eigen energies of the Hamitonian Operator $\hat{H}$. The unit is Ry.
579+
- **Default**:0.0
580+
581+
#### emax_sto
582+
583+
- **Type**: Real
584+
- **Description**: Trial energy to guess the upper bound of eigen energies of the Hamitonian Operator $\hat{H}$. The unit is Ry.
585+
- **Default**:0.0
586+
587+
#### seed_sto
588+
589+
- **Type**: Integer
590+
- **Description**: The random seed to generate stochastic orbitals.
591+
- seed_sto>=0: Stochastic orbitals have the form of $\exp(i2\pi\theta(G))$, where $\theta$ is a uniform distribution in $(0,1)$. If seed_sto=0, the seed is decided by time(NULL).
592+
- seed_sto<=-1: Stochastic orbitals have the form of $\pm1$ with the equal probability. If seed_sto=-1, the seed is decided by time(NULL).
593+
- **Default**:0
594+
554595
### Geometry relaxation
555596
556597
This part of variables are used to control the geometry relaxation.

docs/install.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ You can also choose to build with which components.
7575
```bash
7676
cmake -B build -DUSE_LIBXC=1 -DUSE_CUDA=1
7777
```
78+
```bash
79+
cmake -B build -DUSE_CUSOLVER_LCAO=1
80+
```
7881

7982
If Libxc is not installed in standard path (i.e. installed with a custom prefix path), you may add the installation prefix of `FindLibxc.cmake` to `CMAKE_MODULE_PATH` environment variable, or set `Libxc_DIR` to the directory containing the file.
8083

examples/performance/clean.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
for i in P*;do
4-
rm -rf $i/result.log $i/OUT.* $i/result.out
4+
rm -rf $i/result.log $i/time.log $i/OUT.* $i/result.out
55
done
66

77
rm -rf *cpu *kpar *bxyz sum.dat* *.old log

examples/performance/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ run_abacus(){
8080
lastword=`tail -1 result.log | awk '{print $1}'`
8181
fi
8282
if [[ $lastword != "SEE" ]];then
83-
mpirun -n $1 -env OMP_NUM_THREADS=$2 $abacus > result.log
83+
/usr/bin/time -v mpirun -n $1 -env OMP_NUM_THREADS=$2 $abacus > result.log 2>time.log
8484
else
8585
printf "**result.log is normal end, skip this job** "
8686
fi

examples/performance/sumdat.sh

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ fi
1313
test -f $outf && rm $outf
1414

1515
#title
16-
printf "%20s %7s %8s %8s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" \
16+
printf "%20s %7s %8s %8s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" \
1717
"example" "Natoms" "EneCut" "k-points" "NProc" "Niter" "TotTime" "1stSCF" "SCF/iter" \
18-
"s_c%" "c_bands%" "s_bands%" "h_psi%" "vloc%" "vnl%" "FFT%" "stress%" "force%"> $outf
18+
"Run%" "c_bands%" "s_bands%" "h_psi%" "vloc%" "vnl%" "FFT%" "stress%" "force%" "MaxResSize" > $outf
1919

2020
for i in `cat $allcase`;do
2121
if [[ ! -f $i/result.log ]];then
@@ -34,13 +34,13 @@ for i in `cat $allcase`;do
3434
niter=`sed -n '/ITER ETOT(eV)/,/><><><><>/'p ${i}/result.log | wc -l|awk '{print $1-2}'`
3535
tottime=`awk '$1=="total"{printf"%.2f", $2}' ${i}/result.log`
3636
scf1=`grep -A 1 "ITER ETOT(eV)" ${i}/result.log | awk 'END{printf"%.2f", $NF}'`
37-
totalscf=`awk '$2=="self_consistent"{print $3}' ${i}/result.log`
37+
totalscf=`awk '$2=="Run"{print $3}' ${i}/result.log`
3838
scfpiter=`awk -v a=$totalscf -v b=$scf1 -v c=$niter 'BEGIN{printf"%.2f",(a-b)/(c-1)}'`
3939
fft=`awk '$2=="FFT3D"{printf"%.1f",$6}' ${i}/result.log`
4040
hpsi=`awk '$2=="h_psi"{printf"%.1f",$6}' ${i}/result.log`
4141
vloc=`awk '$2=="vloc"{printf"%.1f",$6}' ${i}/result.log`
4242
vnl=`awk '$2=="vnl"{printf"%.1f",$6}' ${i}/result.log`
43-
sc=`awk '$2=="self_consistent"{printf"%.1f",$6}' ${i}/result.log`
43+
sc=`awk '$2=="Run"{printf"%.1f",$6}' ${i}/result.log`
4444
cbands=`awk '$2=="c_bands"{printf"%.1f",$6}' ${i}/result.log`
4545
sbands=`awk '$2=="sum_band"{printf"%.1f",$6}' ${i}/result.log`
4646
stress=`awk '$2=="cal_stress"{printf"%.1f",$6}' ${i}/result.log`
@@ -53,13 +53,13 @@ for i in `cat $allcase`;do
5353
niter=`sed -n '/ITER ETOT(eV)/,/><><><><>/'p ${i}/result.log | wc -l|awk '{print $1-2}'`
5454
tottime=`awk '$1=="total"{printf"%.2f", $2}' ${i}/result.log`
5555
scf1=`grep -A 1 "ITER ETOT(eV)" ${i}/result.log | awk 'END{printf"%.2f", $NF}'`
56-
totalscf=`awk '$1=="ELEC_scf"{print $3}' ${i}/result.log`
56+
totalscf=`awk '$1=="Run"{print $3}' ${i}/result.log`
5757
scfpiter=`awk -v a=$totalscf -v b=$scf1 -v c=$niter 'BEGIN{printf"%.2f",(a-b)/(c-1)}'`
5858
fft="-"
5959
hpsi="-"
6060
vloc=`awk '$2=="vlocal"{printf"%.1f",$6}' ${i}/result.log`
6161
vnl="-"
62-
sc=`awk '$1=="ELEC_scf"{printf"%.1f",$6}' ${i}/result.log`
62+
sc=`awk '$2=="Run"{printf"%.1f",$6}' ${i}/result.log`
6363
cbands=`awk '$2=="cal_bands"{printf"%.1f",$6}' ${i}/result.log`
6464
sbands=`awk '$2=="sum_bands"{printf"%.1f",$6}' ${i}/result.log`
6565
stress=`awk '$2=="evaluate_vl_stress"{printf"%.1f",$6}' ${i}/result.log`
@@ -68,9 +68,10 @@ for i in `cat $allcase`;do
6868
echo "ERROR: UNKNOW basis type $basis"
6969
continue
7070
fi
71+
maxres=`grep "Maximum resident set size" ${i}/time.log | awk '{print $NF}'`
7172

72-
printf "%20s %7s %8s %8s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" \
73+
printf "%20s %7s %8s %8s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %s\n" \
7374
$i $natoms $encut $kpt $nproc $niter $tottime $scf1 $scfpiter $sc $cbands $sbands $hpsi $vloc $vnl $fft \
74-
$stress $force >> $outf
75+
$stress $force $maxres >> $outf
7576

7677
done

source/Makefile.Objects

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,8 @@ soc.o\
5757
to_wannier90.o \
5858
unk_overlap_pw.o \
5959
berryphase.o \
60-
sto_elec.o\
61-
sto_wf.o\
6260
sto_iter.o\
61+
sto_wf.o\
6362
sto_hchi.o\
6463
sto_che.o\
6564

source/driver.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ void Driver::atomic_world(void)
9696
ModuleESolver::ESolver *p_esolver;
9797
if(GlobalV::BASIS_TYPE=="pw" || GlobalV::BASIS_TYPE=="lcao_in_pw")
9898
{
99-
use_ensol = "ksdft_pw";
99+
if(GlobalV::CALCULATION.substr(0,3) == "sto") use_ensol = "sdft_pw";
100+
else use_ensol = "ksdft_pw";
100101
//We set it temporarily
101102
//Finally, we have ksdft_pw, ksdft_lcao, sdft_pw, ofdft, lj, eam, etc.
102103
ModuleESolver::init_esolver(p_esolver, use_ensol);

source/input.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ void Input::Default(void)
138138
nbands_sto = 0;
139139
nbands_istate = 5;
140140
pw_seed = 1;
141-
nche_sto = 0;
141+
nche_sto = 5;
142142
seed_sto = 0;
143-
stotype = "pw";
143+
bndpar = 1;
144144
kpar = 1;
145145
berry_phase = false;
146146
gdir = 3;
@@ -519,8 +519,8 @@ bool Input::Read(const std::string &fn)
519519
else if (strcmp("nbands", word) == 0) // number of atom bands
520520
{
521521
read_value(ifs, nbands);
522-
if (nbands <= 0)
523-
ModuleBase::WARNING_QUIT("Input", "NBANDS must > 0");
522+
if (nbands < 0)
523+
ModuleBase::WARNING_QUIT("Input", "NBANDS must >= 0");
524524
}
525525
else if (strcmp("nbands_sto", word) == 0) // number of stochastic bands
526526
{
@@ -553,9 +553,9 @@ bool Input::Read(const std::string &fn)
553553
{
554554
read_value(ifs, emin_sto);
555555
}
556-
else if (strcmp("stotype", word) == 0)
556+
else if (strcmp("bndpar", word) == 0)
557557
{
558-
read_value(ifs, stotype);
558+
read_value(ifs, bndpar);
559559
}
560560
else if (strcmp("kpar", word) == 0) // number of pools
561561
{
@@ -1803,6 +1803,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04
18031803
vdw_radius = "95";
18041804
}
18051805
}
1806+
if(calculation.substr(0,3) != "sto") bndpar = 1;
18061807
}
18071808
#ifdef __MPI
18081809
void Input::Bcast()
@@ -1833,7 +1834,7 @@ void Input::Bcast()
18331834
Parallel_Common::bcast_int(pw_seed);
18341835
Parallel_Common::bcast_double(emax_sto);
18351836
Parallel_Common::bcast_double(emin_sto);
1836-
Parallel_Common::bcast_string(stotype);
1837+
Parallel_Common::bcast_int(bndpar);
18371838
Parallel_Common::bcast_int(kpar);
18381839
Parallel_Common::bcast_bool(berry_phase);
18391840
Parallel_Common::bcast_int(gdir);
@@ -2124,7 +2125,7 @@ void Input::Check(void)
21242125
ModuleBase::TITLE("Input", "Check");
21252126

21262127
if (nbands < 0)
2127-
ModuleBase::WARNING_QUIT("Input", "NBANDS must > 0");
2128+
ModuleBase::WARNING_QUIT("Input", "NBANDS must >= 0");
21282129
// if(nbands_istate < 0) ModuleBase::WARNING_QUIT("Input","NBANDS_ISTATE must > 0");
21292130
if (nb2d < 0)
21302131
ModuleBase::WARNING_QUIT("Input", "nb2d must > 0");
@@ -2180,7 +2181,7 @@ void Input::Check(void)
21802181
*/
21812182
this->relax_nmax = 1;
21822183
}
2183-
else if (calculation == "scf-sto") // qianrui 2021-2-20
2184+
else if (calculation == "sto-scf") // qianrui 2021-2-20
21842185
{
21852186
if (mem_saver == 1)
21862187
{
@@ -2484,6 +2485,12 @@ void Input::Check(void)
24842485
{
24852486
ModuleBase::WARNING_QUIT("Input", "not ready for linear_scaling method in lcao .");
24862487
}
2488+
else if (ks_solver == "cusolver")
2489+
{
2490+
#ifndef __MPI
2491+
ModuleBase::WARNING_QUIT("Input","Cusolver can not be used for series version.");
2492+
#endif
2493+
}
24872494
else
24882495
{
24892496
ModuleBase::WARNING_QUIT("Input", "please check the ks_solver parameter!");
@@ -2665,7 +2672,7 @@ void Input::Check(void)
26652672
if (!(calculation == "nscf"))
26662673
ModuleBase::WARNING_QUIT("Input", "calculate berry phase, please set calculation = nscf");
26672674
}
2668-
else if (basis_type == "lcao" && (ks_solver == "genelpa" || ks_solver == "scalapack_gvx"))
2675+
else if (basis_type == "lcao" && ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver")
26692676
{
26702677
if (!(calculation == "nscf"))
26712678
ModuleBase::WARNING_QUIT("Input", "calculate berry phase, please set calculation = nscf");

0 commit comments

Comments
 (0)