diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8571ab6309..8225aba614 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,7 @@ jobs: - name: Configure run: | - cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_MLKEDF=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=1 + cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_MLKEDF=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DENABLE_FLOAT_FFTW=ON # Temporarily removed because no one maintains this now. # And it will break the CI test workflow. diff --git a/.gitignore b/.gitignore index 444e237950..ebad4b553d 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,4 @@ __pycache__ abacus.json *.npy toolchain/install/ -toolchain/abacus_env.sh +toolchain/abacus_env.sh \ No newline at end of file diff --git a/source/module_base/test/math_chebyshev_test.cpp b/source/module_base/test/math_chebyshev_test.cpp index a7ea215266..ada96fe0f9 100644 --- a/source/module_base/test/math_chebyshev_test.cpp +++ b/source/module_base/test/math_chebyshev_test.cpp @@ -14,9 +14,6 @@ * - calfinalvec_real * - calfinalvec_complex * - tracepolyA - * - checkconverge - * - * */ class toolfunc { @@ -625,6 +622,8 @@ TEST_F(MathChebyshevTest, tracepolyA_float) TEST_F(MathChebyshevTest, checkconverge_float) { + #ifdef __MPI + #undef __MPI const int norder = 100; p_fchetest = new ModuleBase::Chebyshev(norder); @@ -648,5 +647,6 @@ TEST_F(MathChebyshevTest, checkconverge_float) delete[] v; delete p_fchetest; + #endif } #endif \ No newline at end of file diff --git a/source/module_base/test_parallel/CMakeLists.txt b/source/module_base/test_parallel/CMakeLists.txt index 5132549f7a..52f467690a 100644 --- a/source/module_base/test_parallel/CMakeLists.txt +++ b/source/module_base/test_parallel/CMakeLists.txt @@ -40,6 +40,12 @@ AddTest( SOURCES test_para_gemm.cpp ) +AddTest( + TARGET base_math_chebyshev_mpi + LIBS MPI::MPI_CXX parameter ${math_libs} base device container + SOURCES math_chebyshev_mpi_test.cpp +) + add_test(NAME base_para_gemm_parallel COMMAND mpirun -np 4 ./base_para_gemm WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} diff --git a/source/module_base/test_parallel/math_chebyshev_mpi_test.cpp b/source/module_base/test_parallel/math_chebyshev_mpi_test.cpp new file mode 100644 index 0000000000..5ca222bb3c --- /dev/null +++ b/source/module_base/test_parallel/math_chebyshev_mpi_test.cpp @@ -0,0 +1,207 @@ +#include "../math_chebyshev.h" +#include "mpi.h" +#include "module_base/parallel_comm.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +/************************************************ + * unit test of class Chebyshev MPI part + ***********************************************/ + + /** + * - Tested Functions: + * - checkconverge + */ +class toolfunc +{ + public: + double x7(double x) + { + return pow(x, 7); + } + double x6(double x) + { + return pow(x, 6); + } + double expr(double x) + { + return exp(x); + } + std::complex expi(std::complex x) + { + const std::complex j(0.0, 1.0); + return exp(j * x); + } + std::complex expi2(std::complex x) + { + const std::complex j(0.0, 1.0); + const double PI = 3.14159265358979323846; + return exp(j * PI / 2.0 * x); + } + // Pauli matrix: [0,-i;i,0] + int LDA = 2; + double factor = 1; + void sigma_y(std::complex* spin_in, std::complex* spin_out, const int m = 1) + { + const std::complex j(0.0, 1.0); + if (this->LDA < 2) { + this->LDA = 2; +} + for (int i = 0; i < m; ++i) + { + spin_out[LDA * i] = -factor * j * spin_in[LDA * i + 1]; + spin_out[LDA * i + 1] = factor * j * spin_in[LDA * i]; + } + } +#ifdef __ENABLE_FLOAT_FFTW + float x7(float x) + { + return pow(x, 7); + } + float x6(float x) + { + return pow(x, 6); + } + float expr(float x) + { + return exp(x); + } + std::complex expi(std::complex x) + { + const std::complex j(0.0, 1.0); + return exp(j * x); + } + std::complex expi2(std::complex x) + { + const std::complex j(0.0, 1.0); + const float PI = 3.14159265358979323846; + return exp(j * PI / 2.0f * x); + } + // Pauli matrix: [0,-i;i,0] + void sigma_y(std::complex* spin_in, std::complex* spin_out, const int m = 1) + { + const std::complex j(0.0, 1.0); + if (this->LDA < 2) + this->LDA = 2; + for (int i = 0; i < m; ++i) + { + spin_out[LDA * i] = -j * spin_in[LDA * i + 1]; + spin_out[LDA * i + 1] = j * spin_in[LDA * i]; + } + } +#endif +}; +class MathChebyshevTest : public testing::Test +{ + protected: + ModuleBase::Chebyshev* p_chetest; + ModuleBase::Chebyshev* p_fchetest; + toolfunc fun; + int dsize = 0; + int my_rank = 0; + void SetUp() override + { + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + int color = (world_rank < world_size / 2) ? 0 : 1; + int key = world_rank; + + MPI_Comm_split(MPI_COMM_WORLD, color, key, &POOL_WORLD); + + int pool_rank, pool_size; + MPI_Comm_rank(POOL_WORLD, &pool_rank); + MPI_Comm_size(POOL_WORLD, &pool_size); + } + void TearDown() override + { + } +}; + +TEST_F(MathChebyshevTest, checkconverge) +{ + const int norder = 100; + p_chetest = new ModuleBase::Chebyshev(norder); + auto fun_sigma_y + = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; + + std::complex* v = new std::complex[4]; + v[0] = 1.0; + v[1] = 0.0; + v[2] = 0.0; + v[3] = 1.0; //[1 0; 0 1] + double tmin = -1.1; + double tmax = 1.1; + bool converge; + converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE(converge); + converge = p_chetest->checkconverge(fun_sigma_y, v + 2, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE(converge); + EXPECT_NEAR(tmin, -1.1, 1e-8); + EXPECT_NEAR(tmax, 1.1, 1e-8); + + tmax = -1.1; + converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 2.2); + EXPECT_TRUE(converge); + EXPECT_NEAR(tmin, -1.1, 1e-8); + EXPECT_NEAR(tmax, 1.1, 1e-8); + + // not converge + v[0] = std::complex(0, 1), v[1] = 1; + fun.factor = 1.5; + tmin = -1.1, tmax = 1.1; + converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_FALSE(converge); + + fun.factor = -1.5; + tmin = -1.1, tmax = 1.1; + converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_FALSE(converge); + fun.factor = 1; + + delete[] v; + delete p_chetest; +} + +#ifdef __ENABLE_FLOAT_FFTW +TEST_F(MathChebyshevTest, checkconverge_float) +{ + const int norder = 100; + p_fchetest = new ModuleBase::Chebyshev(norder); + + std::complex* v = new std::complex[4]; + v[0] = 1.0; + v[1] = 0.0; + v[2] = 0.0; + v[3] = 1.0; //[1 0; 0 1] + float tmin = -1.1; + float tmax = 1.1; + bool converge; + + auto fun_sigma_yf + = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; + converge = p_fchetest->checkconverge(fun_sigma_yf, v, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE(converge); + converge = p_fchetest->checkconverge(fun_sigma_yf, v + 2, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE(converge); + EXPECT_NEAR(tmin, -1.1, 1e-6); + EXPECT_NEAR(tmax, 1.1, 1e-6); + + delete[] v; + delete p_fchetest; +} +#endif + +int main(int argc, char** argv) +{ +#ifdef __MPI + MPI_Init(&argc, &argv); +#endif + testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); +#ifdef __MPI + MPI_Finalize(); +#endif + return result; +} diff --git a/source/module_basis/module_pw/module_fft/fft_cpu.cpp b/source/module_basis/module_pw/module_fft/fft_cpu.cpp index be920d4ae2..5c4783d83d 100644 --- a/source/module_basis/module_pw/module_fft/fft_cpu.cpp +++ b/source/module_basis/module_pw/module_fft/fft_cpu.cpp @@ -347,11 +347,14 @@ void FFT_CPU::fftxyfor(std::complex* in, std::complex* o int npy = this->nplane * this->ny; if (this->xprime) { + fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); + #pragma omp parallel for for (int i = 0; i < this->lixy + 1; ++i) { fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); } + #pragma omp parallel for for (int i = rixy; i < this->nx; ++i) { fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); @@ -359,6 +362,7 @@ void FFT_CPU::fftxyfor(std::complex* in, std::complex* o } else { + #pragma omp parallel for for (int i = 0; i < this->nx; ++i) { fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); @@ -374,10 +378,12 @@ void FFT_CPU::fftxybac(std::complex* in,std::complex* ou int npy = this->nplane * this->ny; if (this->xprime) { + #pragma omp parallel for for (int i = 0; i < this->lixy + 1; ++i) { fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); } + #pragma omp parallel for for (int i = rixy; i < this->nx; ++i) { fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); @@ -388,6 +394,7 @@ void FFT_CPU::fftxybac(std::complex* in,std::complex* ou { fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); fftw_execute_dft(this->planxbac2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); + #pragma omp parallel for for (int i = 0; i < this->nx; ++i) { fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); @@ -414,6 +421,7 @@ void FFT_CPU::fftxyr2c(double* in, std::complex* out) const if (this->xprime) { fftw_execute_dft_r2c(this->planxr2c, in, (fftw_complex*)out); + #pragma omp parallel for for (int i = 0; i < this->lixy + 1; ++i) { fftw_execute_dft(this->planyfor, (fftw_complex*)&out[i * npy], (fftw_complex*)&out[i * npy]); @@ -421,6 +429,7 @@ void FFT_CPU::fftxyr2c(double* in, std::complex* out) const } else { + #pragma omp parallel for for (int i = 0; i < this->nx; ++i) { fftw_execute_dft_r2c(this->planyr2c, &in[i * npy], (fftw_complex*)&out[i * npy]); @@ -435,6 +444,7 @@ void FFT_CPU::fftxyc2r(std::complex *in,double *out) const int npy = this->nplane * this->ny; if (this->xprime) { + #pragma omp parallel for for (int i = 0; i < this->lixy + 1; ++i) { fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&in[i * npy]); @@ -444,6 +454,7 @@ void FFT_CPU::fftxyc2r(std::complex *in,double *out) const else { fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)in); + #pragma omp parallel for for (int i = 0; i < this->nx; ++i) { fftw_execute_dft_c2r(this->planyc2r, (fftw_complex*)&in[i * npy], &out[i * npy]); diff --git a/source/module_basis/module_pw/pw_basis.cpp b/source/module_basis/module_pw/pw_basis.cpp index 034b1b49a3..f4f7abf1dd 100644 --- a/source/module_basis/module_pw/pw_basis.cpp +++ b/source/module_basis/module_pw/pw_basis.cpp @@ -17,7 +17,7 @@ PW_Basis::PW_Basis(std::string device_, std::string precision_) : device(std::mo classname="PW_Basis"; this->fft_bundle.setfft("cpu",this->precision); this->double_data_ = (this->precision == "double") || (this->precision == "mixing"); - this->float_data_ = (this->precision == "single") || (this->precision == "mixing"); + this->float_data_ = (this->precision == "single") || (this->precision == "mixing"); } PW_Basis:: ~PW_Basis() diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp index 91343d61a4..a4689ab2d2 100644 --- a/source/module_basis/module_pw/pw_basis_k.cpp +++ b/source/module_basis/module_pw/pw_basis_k.cpp @@ -203,11 +203,11 @@ void PW_Basis_K::setuptransform() this->getstartgr(); this->setupIndGk(); this->fft_bundle.clear(); + std::string fft_device = this->device; #if defined(__DSP) - this->fft_bundle.setfft("dsp", this->precision); -#else - this->fft_bundle.setfft(this->device, this->precision); + fft_device = "dsp"; #endif + this->fft_bundle.setfft(fft_device, this->precision); if (this->xprime) { this->fft_bundle.initfft(this->nx, diff --git a/source/module_basis/module_pw/pw_gatherscatter.h b/source/module_basis/module_pw/pw_gatherscatter.h index 9279ce3723..97be6e5c23 100644 --- a/source/module_basis/module_pw/pw_gatherscatter.h +++ b/source/module_basis/module_pw/pw_gatherscatter.h @@ -98,8 +98,7 @@ void PW_Basis::gatherp_scatters(std::complex* in, std::complex* out) const template void PW_Basis::gathers_scatterp(std::complex* in, std::complex* out) const { - //ModuleBase::timer::tick(this->classname, "gathers_scatterp"); - + // ModuleBase::timer::tick(this->classname, "gathers_scatterp"); if(this->poolnproc == 1) //In this case nrxx=fftnx*fftny*nz, nst = nstot, { #ifdef _OPENMP @@ -183,7 +182,7 @@ void PW_Basis::gathers_scatterp(std::complex* in, std::complex* out) const } } #endif - //ModuleBase::timer::tick(this->classname, "gathers_scatterp"); + // ModuleBase::timer::tick(this->classname, "gathers_scatterp"); return; } diff --git a/source/module_basis/module_pw/pw_transform.cpp b/source/module_basis/module_pw/pw_transform.cpp index 4f34221775..9d83d57e3c 100644 --- a/source/module_basis/module_pw/pw_transform.cpp +++ b/source/module_basis/module_pw/pw_transform.cpp @@ -210,7 +210,7 @@ void PW_Basis::recip2real(const std::complex* in, FPTYPE* out, const boo #endif for (int i = 0; i < this->nst * this->nz; ++i) { - fft_bundle.get_auxg_data()[i] = std::complex(0, 0); + fft_bundle.get_auxg_data()[i] = std::complex(0, 0); } #ifdef _OPENMP diff --git a/source/module_basis/module_pw/pw_transform_k.cpp b/source/module_basis/module_pw/pw_transform_k.cpp index a709b60429..61fb2892c2 100644 --- a/source/module_basis/module_pw/pw_transform_k.cpp +++ b/source/module_basis/module_pw/pw_transform_k.cpp @@ -187,7 +187,6 @@ void PW_Basis_K::recip2real(const std::complex* in, this->gathers_scatterp(this->fft_bundle.get_auxg_data(), this->fft_bundle.get_auxr_data()); this->fft_bundle.fftxybac(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); - auto* auxr = this->fft_bundle.get_auxr_data(); if (add) { diff --git a/source/module_basis/module_pw/test/pw_test.cpp b/source/module_basis/module_pw/test/pw_test.cpp index b8d7203f45..0377802c43 100644 --- a/source/module_basis/module_pw/test/pw_test.cpp +++ b/source/module_basis/module_pw/test/pw_test.cpp @@ -36,11 +36,10 @@ class TestEnv : public testing::Environment int main(int argc, char **argv) { - int kpar; kpar = 1; #ifdef __ENABLE_FLOAT_FFTW - precision_flag = "single"; + precision_flag = "mixing"; #else precision_flag = "double"; #endif diff --git a/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp b/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp index 153d46302d..2b4b4fc6ce 100644 --- a/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp +++ b/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp @@ -48,6 +48,7 @@ TEST_F(PWBasisKTEST,Constructor) EXPECT_EQ(basis_k2.precision,"double"); EXPECT_EQ(basis_k2.fft_bundle.precision,"double"); ModulePW::PW_Basis_K basis_k3(device_flag, precision_single); + EXPECT_EQ(basis_k3.precision,"single"); EXPECT_EQ(basis_k3.fft_bundle.precision,"single"); } diff --git a/source/module_esolver/esolver_fp.cpp b/source/module_esolver/esolver_fp.cpp index 7861195579..cf6d8a888f 100644 --- a/source/module_esolver/esolver_fp.cpp +++ b/source/module_esolver/esolver_fp.cpp @@ -23,45 +23,60 @@ namespace ModuleESolver ESolver_FP::ESolver_FP() { - std::string fft_device = PARAM.inp.device; +} + +ESolver_FP::~ESolver_FP() +{ + if (pw_rho_flag == true) + { + delete this->pw_rho; + this->pw_rho_flag = false; + } + if (PARAM.globalv.double_grid) + { + delete pw_rhod; + } + delete this->pelec; +} +void ESolver_FP::before_all_runners(UnitCell& ucell, const Input_para& inp) +{ + ModuleBase::TITLE("ESolver_FP", "before_all_runners"); + std::string fft_device = PARAM.inp.device; + std::string fft_precison = PARAM.inp.precision; // LCAO basis doesn't support GPU acceleration on FFT currently if(PARAM.inp.basis_type == "lcao") { fft_device = "cpu"; } - - pw_rho = new ModulePW::PW_Basis_Big(fft_device, PARAM.inp.precision); + if ((PARAM.inp.precision=="single") || (PARAM.inp.precision=="mixing")) + { + fft_precison = "mixing"; + } + else if (PARAM.inp.precision=="double") + { + fft_precison = "double"; + } + #if (not defined(__ENABLE_FLOAT_FFTW) and (defined(__CUDA) || defined(__RCOM))) + if (fft_device == "gpu") + { + fft_precison = "double"; + } + #endif + pw_rho = new ModulePW::PW_Basis_Big(fft_device, fft_precison); + pw_rho_flag = true; if (PARAM.globalv.double_grid) { - pw_rhod = new ModulePW::PW_Basis_Big(fft_device, PARAM.inp.precision); + pw_rhod = new ModulePW::PW_Basis_Big(fft_device, fft_precison); } else { pw_rhod = pw_rho; } - - // temporary, it will be removed pw_big = static_cast(pw_rhod); pw_big->setbxyz(PARAM.inp.bx, PARAM.inp.by, PARAM.inp.bz); sf.set(pw_rhod, PARAM.inp.nbspline); -} - -ESolver_FP::~ESolver_FP() -{ - delete pw_rho; - if ( PARAM.globalv.double_grid) - { - delete pw_rhod; - } - delete this->pelec; -} - -void ESolver_FP::before_all_runners(UnitCell& ucell, const Input_para& inp) -{ - ModuleBase::TITLE("ESolver_FP", "before_all_runners"); - //! 1) read pseudopotentials if (!PARAM.inp.use_paw) { diff --git a/source/module_esolver/esolver_fp.h b/source/module_esolver/esolver_fp.h index 3634c63be5..9cbdcc7362 100644 --- a/source/module_esolver/esolver_fp.h +++ b/source/module_esolver/esolver_fp.h @@ -95,6 +95,9 @@ class ESolver_FP: public ESolver //! solvent model surchem solvent; + + int pw_rho_flag = false; ///< flag for pw_rho, 0: not initialized, 1: initialized + }; } // namespace ModuleESolver diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index a1f622ef52..708db2823f 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -36,6 +36,27 @@ namespace ModuleESolver template ESolver_KS::ESolver_KS() { +} + + +template +ESolver_KS::~ESolver_KS() +{ + delete this->psi; + delete this->pw_wfc; + delete this->p_hamilt; + delete this->p_chgmix; + this->ppcell.release_memory(); +} + + +template +void ESolver_KS::before_all_runners(UnitCell& ucell, const Input_para& inp) +{ + ModuleBase::TITLE("ESolver_KS", "before_all_runners"); + //! 1) initialize "before_all_runniers" in ESolver_FP + ESolver_FP::before_all_runners(ucell, inp); + classname = "ESolver_KS"; basisname = "PLEASE ADD BASISNAME FOR CURRENT ESOLVER."; @@ -75,27 +96,8 @@ ESolver_KS::ESolver_KS() // cell_factor this->ppcell.cell_factor = PARAM.inp.cell_factor; -} - - -template -ESolver_KS::~ESolver_KS() -{ - delete this->psi; - delete this->pw_wfc; - delete this->p_hamilt; - delete this->p_chgmix; - this->ppcell.release_memory(); -} -template -void ESolver_KS::before_all_runners(UnitCell& ucell, const Input_para& inp) -{ - ModuleBase::TITLE("ESolver_KS", "before_all_runners"); - - //! 1) initialize "before_all_runniers" in ESolver_FP - ESolver_FP::before_all_runners(ucell, inp); /// PAW Section #ifdef USE_PAW diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp index 09d0b56a05..c751b91cab 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp @@ -68,8 +68,7 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral // std::ofstream ofs( outstr.c_str() ) ; bool usebspline; if(nbspline > 0) { usebspline = true; - } else { usebspline = false; -} + } else { usebspline = false;} if(usebspline) { @@ -147,6 +146,7 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral inat++; } } + if (device == "gpu") { if (PARAM.globalv.has_float_data) { resmem_cd_op()(this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); diff --git a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt index f4f6ff247c..963db3e5cb 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt +++ b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt @@ -2,6 +2,7 @@ remove_definitions(-D__DEEPKS) remove_definitions(-D__CUDA) remove_definitions(-D__ROCM) remove_definitions(-D__EXX) +remove_definitions(-DUSE_PAW) AddTest( TARGET pwdft_soc @@ -26,4 +27,31 @@ AddTest( TARGET radial_proj_test LIBS parameter base device ${math_libs} SOURCES radial_proj_test.cpp ../radial_proj.cpp +) + +AddTest( + TARGET structure_factor_test + LIBS parameter ${math_libs} base device planewave + SOURCES structure_factor_test.cpp ../structure_factor.cpp ../parallel_grid.cpp + ../../../module_cell/unitcell.cpp + ../../../module_io/output.cpp + ../../../module_cell/update_cell.cpp + ../../../module_cell/bcast_cell.cpp + ../../../module_cell/print_cell.cpp + ../../../module_cell/atom_spec.cpp + ../../../module_cell/atom_pseudo.cpp + ../../../module_cell/pseudo.cpp + ../../../module_cell/read_stru.cpp + ../../../module_cell/read_atom_species.cpp + ../../../module_cell/read_atoms.cpp + ../../../module_cell/read_pp.cpp + ../../../module_cell/read_pp_complete.cpp + ../../../module_cell/read_pp_upf100.cpp + ../../../module_cell/read_pp_upf201.cpp + ../../../module_cell/read_pp_vwr.cpp + ../../../module_cell/read_pp_blps.cpp + ../../../module_elecstate/read_pseudo.cpp + ../../../module_elecstate/cal_wfc.cpp + ../../../module_elecstate/cal_nelec_nband.cpp + ../../../module_elecstate/read_orb.cpp ) \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/test/structure_factor_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/test/structure_factor_test.cpp new file mode 100644 index 0000000000..e2231754ee --- /dev/null +++ b/source/module_hamilt_pw/hamilt_pwdft/test/structure_factor_test.cpp @@ -0,0 +1,128 @@ +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include +#include +#include +#include "module_cell/unitcell.h" +#include "module_elecstate/module_dm/test/prepare_unitcell.h" +#define private public +#include "module_parameter/parameter.h" +#include "module_hamilt_pw/hamilt_pwdft/structure_factor.h" +#undef private +/************************************************ + * unit test of class Structure_factor and + ***********************************************/ + +/** + * - Tested Functions: + * - Fcoef::create to create a 5 dimensional array of complex numbers + * - Soc::set_fcoef to set the fcoef array + * - Soc::spinor to calculate the spinor + * - Soc::rot_ylm to calculate the rotation matrix + * - Soc::sph_ind to calculate the m index of the spherical harmonics +*/ + +//compare two complex by using EXPECT_DOUBLE_EQ() +InfoNonlocal::InfoNonlocal() +{ +} +InfoNonlocal::~InfoNonlocal() +{ +} + +Magnetism::Magnetism() +{ +} +Magnetism::~Magnetism() +{ +} + +class StructureFactorTest : public testing::Test +{ +protected: + Structure_Factor SF; + std::string output; + ModulePW::PW_Basis* rho_basis; + UnitCell* ucell; + UcellTestPrepare utp = UcellTestLib["Si"]; + Parallel_Grid* pgrid; + std::vector nw = {13}; + int nlocal = 0; +void SetUp() +{ + rho_basis=new ModulePW::PW_Basis; + ucell = utp.SetUcellInfo(nw, nlocal); + ucell->set_iat2iwt(1); + pgrid = new Parallel_Grid; + rho_basis->npw=10; + rho_basis->gcar=new ModuleBase::Vector3[10]; + // for (int ig=0;ignpw;ig++) + // { + // rho_basis->gcar[ig]=1.0; + // } +} +}; + +TEST_F(StructureFactorTest, set) +{ + const ModulePW::PW_Basis* rho_basis_in; + const int nbspline_in =10; + SF.set(rho_basis_in,nbspline_in); + EXPECT_EQ(nbspline_in, 10); +} + + +TEST_F(StructureFactorTest, setup_structure_factor_double) +{ + rho_basis->npw = 10; + SF.setup_structure_factor(ucell,*pgrid,rho_basis); + + for (int i=0;i< ucell->nat * (2 * rho_basis->nx + 1);i++) + { + EXPECT_EQ(SF.z_eigts1[i].real(),1); + EXPECT_EQ(SF.z_eigts1[i].imag(),0); + } + + for (int i=0;i< ucell->nat * (2 * rho_basis->ny + 1);i++) + { + EXPECT_EQ(SF.z_eigts2[i].real(),1); + EXPECT_EQ(SF.z_eigts2[i].imag(),0); + } + + for (int i=0;i< ucell->nat * (2 * rho_basis->nz + 1);i++) + { + EXPECT_EQ(SF.z_eigts3[i].real(),1); + EXPECT_EQ(SF.z_eigts3[i].imag(),0); + } +} + +TEST_F(StructureFactorTest, setup_structure_factor_float) +{ + PARAM.sys.has_float_data = true; + rho_basis->npw = 10; + SF.setup_structure_factor(ucell,*pgrid,rho_basis); + + for (int i=0;i< ucell->nat * (2 * rho_basis->nx + 1);i++) + { + EXPECT_EQ(SF.c_eigts1[i].real(),1); + EXPECT_EQ(SF.c_eigts1[i].imag(),0); + } + + for (int i=0;i< ucell->nat * (2 * rho_basis->ny + 1);i++) + { + EXPECT_EQ(SF.c_eigts2[i].real(),1); + EXPECT_EQ(SF.c_eigts2[i].imag(),0); + } + + for (int i=0;i< ucell->nat * (2 * rho_basis->nz + 1);i++) + { + EXPECT_EQ(SF.c_eigts3[i].real(),1); + EXPECT_EQ(SF.c_eigts3[i].imag(),0); + } +} + +int main() +{ + testing::InitGoogleTest(); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp b/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp index 465fd9c27f..a0654e1e7f 100644 --- a/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp @@ -33,9 +33,13 @@ void hamilt::HamiltSdftPW::hPsi_norm(const T* psi_in, T* hpsi, const template class hamilt::HamiltPW, base_device::DEVICE_CPU>; template class hamilt::HamiltSdftPW, base_device::DEVICE_CPU>; +template class hamilt::HamiltPW, base_device::DEVICE_CPU>; +template class hamilt::HamiltSdftPW, base_device::DEVICE_CPU>; #if ((defined __CUDA) || (defined __ROCM)) template class hamilt::HamiltPW, base_device::DEVICE_GPU>; template class hamilt::HamiltSdftPW, base_device::DEVICE_GPU>; +template class hamilt::HamiltPW, base_device::DEVICE_GPU>; +template class hamilt::HamiltSdftPW, base_device::DEVICE_GPU>; #endif /** diff --git a/source/module_io/read_set_globalv.cpp b/source/module_io/read_set_globalv.cpp index 83bde5c62d..1cc133988a 100644 --- a/source/module_io/read_set_globalv.cpp +++ b/source/module_io/read_set_globalv.cpp @@ -72,7 +72,7 @@ void ReadInput::set_globalv(const Input_para& inp, System_para& sys) bool float_cond = false; #endif sys.has_double_data = (inp.precision == "double") || (inp.precision == "mixing") || float_cond; - sys.has_float_data = (inp.precision == "float") || (inp.precision == "mixing") || float_cond; + sys.has_float_data = (inp.precision == "single") || (inp.precision == "mixing") || float_cond; } /// @note Here para.inp has not been synchronized of all ranks. diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp index 6dd3abe29f..1db10b5caf 100644 --- a/source/module_lr/esolver_lrtd_lcao.cpp +++ b/source/module_lr/esolver_lrtd_lcao.cpp @@ -257,7 +257,11 @@ LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol this->gint_->reset_DMRGint(1); // move pw basis - delete this->pw_rho; // newed in ESolver_FP::ESolver_FP + if (this->pw_rho_flag) + { + this->pw_rho_flag = true; + delete this->pw_rho; // newed in ESolver_FP::ESolver_FP + } this->pw_rho = ks_sol.pw_rho; ks_sol.pw_rho = nullptr; //init potential and calculate kernels using ground state charge diff --git a/tests/01_PW/111_PW_CG_float/INPUT b/tests/01_PW/111_PW_CG_float/INPUT new file mode 100644 index 0000000000..c1a7ad556f --- /dev/null +++ b/tests/01_PW/111_PW_CG_float/INPUT @@ -0,0 +1,29 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +calculation scf + +nbands 6 +symmetry 1 +pseudo_dir ../../PP_ORB + +#Parameters (2.Iteration) +ecutwfc 20 +scf_thr 1e-8 +scf_nmax 100 + + +#Parameters (3.Basis) +basis_type pw + +#Parameters (4.Smearing) +smearing_method gauss +smearing_sigma 0.002 + +#Parameters (5.Mixing) +mixing_type plain +mixing_beta 0.5 + +ks_solver cg +device cpu +precision single \ No newline at end of file diff --git a/tests/01_PW/111_PW_CG_float/KPT b/tests/01_PW/111_PW_CG_float/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/tests/01_PW/111_PW_CG_float/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/tests/01_PW/111_PW_CG_float/README b/tests/01_PW/111_PW_CG_float/README new file mode 100644 index 0000000000..41587e05b8 --- /dev/null +++ b/tests/01_PW/111_PW_CG_float/README @@ -0,0 +1 @@ +test GaAs deformation simulation base parameters use CG method and float precision in CPU device diff --git a/tests/01_PW/111_PW_CG_float/STRU b/tests/01_PW/111_PW_CG_float/STRU new file mode 100644 index 0000000000..9b42a124ca --- /dev/null +++ b/tests/01_PW/111_PW_CG_float/STRU @@ -0,0 +1,19 @@ +ATOMIC_SPECIES +Si 14 Si.pz-vbc.UPF + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +0.0 0.5 0.5 +0.5 0.0 0.5 +0.5 0.5 0.0 + +ATOMIC_POSITIONS +Direct + +Si // Element type +0.0 // magnetism +2 +0.00 0.00 0.00 1 1 1 +0.25 0.25 0.25 1 1 1 diff --git a/tests/01_PW/111_PW_CG_float/result.ref b/tests/01_PW/111_PW_CG_float/result.ref new file mode 100644 index 0000000000..10063ce3c3 --- /dev/null +++ b/tests/01_PW/111_PW_CG_float/result.ref @@ -0,0 +1,6 @@ +etotref -198.22383283 +etotperatomref -99.11191642 +pointgroupref T_d +spacegroupref O_h +nksibzref 1 +totaltimeref diff --git a/tests/01_PW/111_PW_CG_float/threshold b/tests/01_PW/111_PW_CG_float/threshold new file mode 100644 index 0000000000..cc0ad91b67 --- /dev/null +++ b/tests/01_PW/111_PW_CG_float/threshold @@ -0,0 +1,5 @@ +# The float type possesses different precision compared to the double type. +# This integration aims to test the functionality of the float type +# within the plane-wave (pw) basis +threshold 0.00001 +fatal_threshold 1 diff --git a/tests/01_PW/CASES_CPU.txt b/tests/01_PW/CASES_CPU.txt index e34d75ec22..d8b269d929 100644 --- a/tests/01_PW/CASES_CPU.txt +++ b/tests/01_PW/CASES_CPU.txt @@ -105,6 +105,7 @@ 108_PW_MD_2O 109_PW_PBE0 110_PW_ONCV_skip +111_PW_CG_float 801_PW_LT_sc 802_PW_LT_fcc 803_PW_LT_bcc diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/INPUT b/tests/11_PW_GPU/005_PW_CG_GPU_float/INPUT new file mode 100644 index 0000000000..3a22fa5fb9 --- /dev/null +++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/INPUT @@ -0,0 +1,35 @@ +INPUT_PARAMETERS +#Parameters (General) +suffix autotest +pseudo_dir ../../PP_ORB + +gamma_only 0 +calculation scf +symmetry 1 +relax_nmax 1 +out_level ie +smearing_method gaussian +smearing_sigma 0.02 + +#Parameters (3.PW) +ecutwfc 40 +scf_thr 1e-7 +scf_nmax 100 + +#Parameters (LCAO) +basis_type pw +ks_solver cg +device gpu +precision single +chg_extrap second-order +out_dm 0 +pw_diag_thr 0.00001 + +cal_force 1 +#test_force 1 +cal_stress 1 +#test_stress 1 + +mixing_type broyden +mixing_beta 0.4 +mixing_gg0 1.5 diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/KPT b/tests/11_PW_GPU/005_PW_CG_GPU_float/KPT new file mode 100644 index 0000000000..28006d5e2d --- /dev/null +++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +2 2 2 0 0 0 diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/README b/tests/11_PW_GPU/005_PW_CG_GPU_float/README new file mode 100644 index 0000000000..f578f9dfbd --- /dev/null +++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/README @@ -0,0 +1 @@ +test GaAs deformation simulation base parameters use CG method and float precision in GPU device diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/STRU b/tests/11_PW_GPU/005_PW_CG_GPU_float/STRU new file mode 100644 index 0000000000..b03baadd25 --- /dev/null +++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/STRU @@ -0,0 +1,23 @@ +ATOMIC_SPECIES +As 1 As_dojo.upf upf201 +Ga 1 Ga_dojo.upf upf201 + +LATTICE_CONSTANT +1 // add lattice constant, 10.58 ang + +LATTICE_VECTORS +5.33 5.33 0.0 +0.0 5.33 5.33 +5.33 0.0 5.33 +ATOMIC_POSITIONS +Direct //Cartesian or Direct coordinate. + +As +0 +1 +0.300000 0.3300000 0.27000000 0 0 0 + +Ga //Element Label +0 +1 //number of atom +0.00000 0.00000 0.000000 0 0 0 diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/result.ref b/tests/11_PW_GPU/005_PW_CG_GPU_float/result.ref new file mode 100644 index 0000000000..e8e006ec72 --- /dev/null +++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/result.ref @@ -0,0 +1,7 @@ +etotref -4869.7470519303351466 +etotperatomref -2434.8735259652 +totalforceref 5.195370 +totalstressref 37241.38404200 +pointgroupref C_1 +spacegroupref C_1 +nksibzref 8 diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/threshold b/tests/11_PW_GPU/005_PW_CG_GPU_float/threshold new file mode 100644 index 0000000000..b0cedcec2e --- /dev/null +++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/threshold @@ -0,0 +1,4 @@ +threshold 1 +force_threshold 1 +stress_threshold 2 +fatal_threshold 2 diff --git a/tests/11_PW_GPU/CASES_GPU.txt b/tests/11_PW_GPU/CASES_GPU.txt index 3b1710189d..be5e66e229 100644 --- a/tests/11_PW_GPU/CASES_GPU.txt +++ b/tests/11_PW_GPU/CASES_GPU.txt @@ -2,3 +2,4 @@ 002_PW_CG_GPU 003_PW_DA_GPU 004_PW_OW_GPU +005_PW_CG_GPU_float \ No newline at end of file diff --git a/tests/integrate/102_PW_CG/README b/tests/integrate/102_PW_CG/README new file mode 100644 index 0000000000..be4d177fb5 --- /dev/null +++ b/tests/integrate/102_PW_CG/README @@ -0,0 +1 @@ +This test is for silicon diamond structure using gamma point, smearing method,CG solver, double precision \ No newline at end of file diff --git a/tests/performance/P000_si16_pw/INPUT b/tests/performance/P000_si16_pw/INPUT index 0b669f4e03..a195787ad0 100644 --- a/tests/performance/P000_si16_pw/INPUT +++ b/tests/performance/P000_si16_pw/INPUT @@ -22,4 +22,4 @@ smearing_sigma 0.002 #Parameters (5.Mixing) mixing_type broyden mixing_beta 0.3 -ks_solver dav +ks_solver dav \ No newline at end of file diff --git a/tests/performance/P002_si64_pw/INPUT b/tests/performance/P002_si64_pw/INPUT index 783c0cb3dd..5a6727692b 100644 --- a/tests/performance/P002_si64_pw/INPUT +++ b/tests/performance/P002_si64_pw/INPUT @@ -21,4 +21,4 @@ smearing_sigma 0.002 #Parameters (5.Mixing) mixing_type broyden -mixing_beta 0.3 +mixing_beta 0.3 \ No newline at end of file