deepmodeling
diff --git a/‎source/main.cpp‎
Lines changed: 30 additions & 18 deletions b/‎source/main.cpp‎
Lines changed: 30 additions & 18 deletions
diff --git a/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion b/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 1 addition & 3 deletions b/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/test.cu‎
Lines changed: 0 additions & 106 deletions b/‎source/module_basis/module_pw/module_fft/test.cu‎
Lines changed: 0 additions & 106 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/test.cuh‎
Lines changed: 0 additions & 5 deletions b/‎source/module_basis/module_pw/module_fft/test.cuh‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎source/module_basis/module_pw/pw_transform_k.cpp‎
Lines changed: 3 additions & 6 deletions b/‎source/module_basis/module_pw/pw_transform_k.cpp‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎source/module_basis/module_pw/test_gpu/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎source/module_basis/module_pw/test_gpu/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/module_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp‎
Lines changed: 43 additions & 43 deletions b/‎source/module_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp‎
Lines changed: 43 additions & 43 deletions
@@ -2,42 +2,54 @@
 // AUTHOR : mohan
 // DATE : 2008-11-10
 //==========================================================
-
 #include "driver.h"
 #include "fftw3.h"
 #include "module_base/parallel_global.h"
 #include "module_io/parse_args.h"
 #include "module_parameter/parameter.h"
-#include "module_basis/module_pw/module_fft/fft_bundle.h"
 #ifdef _OPENMP
 #include <omp.h>
 #endif
-#include "module_basis/module_pw/module_fft/test.cuh"
+
 int main(int argc, char** argv)
 {
     /*
     read the arguement in the command-line,
     with "abacus -v", the program exit and returns version info,
     with no arguments, the program continues.
     */
-       std::cout << "FFT Bundle Example" << std::endl;
+    ModuleIO::parse_args(argc, argv);
 
-    // Example usage of make_unique
-    ModulePW::test();
-    // Note: The following lines are commented out as they require specific FFT implementations
-    // Uncomment and implement the FFT operations as needed
-    
-    // auto fft_bundle = make_unique<FFT_Bundle>("gpu", "single");
-
-    // Initialize FFT parameters
-    // fft_bundle->initfft(256, 256, 256, 64, 64, 1, 1, 1, false, false, false);
+    /*
+    read the mpi parameters in the command-line,
+    initialize the mpi environment.
+    */
+    int nproc = 1;
+    int my_rank = 0;
+    int nthread_per_proc = 1;
+    Parallel_Global::read_pal_param(argc, argv, nproc, nthread_per_proc, my_rank);
+#ifdef _OPENMP
+    // ref: https://www.fftw.org/fftw3_doc/Usage-of-Multi_002dthreaded-FFTW.html
+    fftw_init_threads();
+    fftw_plan_with_nthreads(omp_get_max_threads());
+#endif
+    PARAM.set_pal_param(my_rank, nproc, nthread_per_proc);
 
-    // Perform FFT operations
-    // std::complex<float> input[256];
-    // std::complex<float> output[256];
-    // fft_bundle->fftxyfor(input, output);
+    /*
+    main program for doing electronic structure calculations.
+    */
+    Driver DD;
+    DD.init();
 
-    std::cout << "FFT operation completed." << std::endl;
+    /*
+    After running mpi version of abacus, release the mpi resources.
+    */
+#ifdef __MPI
+    Parallel_Global::finalize_mpi();
+#endif
+#ifdef _OPENMP
+    fftw_cleanup_threads();
+#endif
 
     return 0;
 }
@@ -7,7 +7,6 @@ if (USE_CUDA)
   list (APPEND FFT_SRC
     module_fft/fft_cuda.cpp
     module_fft/fft_cuda_batch.cpp
-    module_fft/test.cu
   )
 endif()
 if (USE_ROCM)
 
@@ -101,7 +101,6 @@ void FFT_Bundle::initfft(int nx_in,
         fft_double = make_unique<FFT_ROCM<double>>();
         fft_double->initfft(nx_in, ny_in, nz_in);
 #elif defined(__CUDA)
-        std::cout<<"here is the set of the gpu"<<std::endl;
         fft_float = make_unique<FFT_CUDA<float>>();
         fft_float->initfft(nx_in, ny_in, nz_in);
         fft_double = make_unique<FFT_CUDA<double>>();
@@ -115,14 +114,13 @@ void FFT_Bundle::initfft(int nx_in,
         fft_double = make_unique<FFT_ROCM<double>>();
         fft_double->initfft(nx_in, ny_in, nz_in);
 #elif defined(__CUDA)   
-        std::cout<<"here is the set of the batch gpu"<<std::endl;
         fft_float = make_unique<FFT_CUDA_BATCH<float>>();
         fft_float->initfft(nx_in, ny_in, nz_in);
         fft_double = make_unique<FFT_CUDA_BATCH<double>>();
         fft_double->initfft(nx_in, ny_in, nz_in );
 #endif
     }else{
-        // ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the device to cpu or gpu or dsp");
+        ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the device to cpu or gpu or dsp");
     }
 }
 
 
@@ -491,14 +491,13 @@ void PW_Basis_K::real2recip_gpu(const std::complex<FPTYPE>* in,
 
     const int startig = ik * this->npwk_max;
     const int npw_k = this->npwk[ik];
-    std::cout << "real2recip_gpu: npw_k = " << npw_k << ", nxyz = " << this->nxyz << std::endl;
     set_real_to_recip_output_op<FPTYPE, base_device::DEVICE_GPU>()(npw_k,
                                                                    this->nxyz,
                                                                    add,
                                                                    factor,
                                                                    this->ig2ixyz_k + startig,
                                                                    this->fft_bundle.get_auxr_3d_data<FPTYPE>(),
-                                                                   out,1);
+                                                                   out);
     ModuleBase::timer::tick(this->classname, "real_to_recip gpu");
 }
 template <typename FPTYPE>
@@ -520,18 +519,16 @@ void PW_Basis_K::recip2real_gpu(const std::complex<FPTYPE>* in,
     const int startig = ik * this->npwk_max;
     const int npw_k = this->npwk[ik];
     set_3d_fft_box_op<FPTYPE, base_device::DEVICE_GPU>()(npw_k,
-                                                         nxyz,
                                                          this->ig2ixyz_k + startig,
                                                          in,
-                                                         this->fft_bundle.get_auxr_3d_data<FPTYPE>(),
-                                                         1);
+                                                         this->fft_bundle.get_auxr_3d_data<FPTYPE>());
     this->fft_bundle.fft3D_backward(this->fft_bundle.get_auxr_3d_data<FPTYPE>(), this->fft_bundle.get_auxr_3d_data<FPTYPE>());
 
     set_recip_to_real_output_op<FPTYPE, base_device::DEVICE_GPU>()(this->nrxx,
                                                                    add,
                                                                    factor,
                                                                    this->fft_bundle.get_auxr_3d_data<FPTYPE>(),
-                                                                   out,1);
+                                                                   out);
 
     ModuleBase::timer::tick(this->classname, "recip_to_real gpu");
 }
 
@@ -3,7 +3,7 @@ if (USE_CUDA)
 AddTest(
   TARGET pw_test_gpu
   LIBS parameter ${math_libs} base planewave device FFTW3::FFTW3_FLOAT
-  SOURCES pw_test.cpp  pw_basis_k_batch.cpp
+  SOURCES pw_test.cpp  pw_basis_C2R.cpp pw_basis_C2C.cpp pw_basis_k_C2C.cpp pw_basis_k_batch.cpp
 )
 endif()
 
@@ -196,41 +196,41 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test
     }
 };
 
-using MixedTypes = ::testing::Types<
+using MixedTypes = ::testing::Types<TypePair<float, base_device::DEVICE_GPU>,
                                     TypePair<double, base_device::DEVICE_GPU> >;
 
 TYPED_TEST_CASE(PW_BASIS_K_GPU_TEST, MixedTypes);
 
-// TYPED_TEST(PW_BASIS_K_GPU_TEST, Mixing)
-// {
-//     using T = typename TestFixture::T;
-//     using Device = typename TestFixture::Device;
-//     ModulePW::PW_Basis_K pwtest;
-//     pwtest.set_device("gpu");
-//     pwtest.set_precision("mixing");
-//     pwtest.fft_bundle.setfft("gpu", "mixing");
-//     this->init(pwtest);
-//     int startiz = pwtest.startz_current;
-//     const int nx = pwtest.nx;
-//     const int ny = pwtest.ny;
-//     const int nz = pwtest.nz;
-//     const int nplane = pwtest.nplane;
-//     const int npwk = pwtest.npwk[0];
-//     for (int ixy = 0; ixy < nx * ny; ++ixy)
-//     {
-//         const int offset = ixy * nz + startiz;
-//         const int startz = ixy * nplane;
-//         for (int iz = 0; iz < nplane; ++iz)
-//         {
-//             EXPECT_NEAR(this->tmp[offset + iz].real(), this->h_rhor[startz + iz].real(), 1e-4);
-//         }
-//     }
-//     for (int ig = 0; ig < npwk; ++ig)
-//     {
-//         EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4);
-//         EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4);
-//     }
-// }
+TYPED_TEST(PW_BASIS_K_GPU_TEST, Mixing)
+{
+    using T = typename TestFixture::T;
+    using Device = typename TestFixture::Device;
+    ModulePW::PW_Basis_K pwtest;
+    pwtest.set_device("gpu");
+    pwtest.set_precision("mixing");
+    pwtest.fft_bundle.setfft("gpu", "mixing");
+    this->init(pwtest);
+    int startiz = pwtest.startz_current;
+    const int nx = pwtest.nx;
+    const int ny = pwtest.ny;
+    const int nz = pwtest.nz;
+    const int nplane = pwtest.nplane;
+    const int npwk = pwtest.npwk[0];
+    for (int ixy = 0; ixy < nx * ny; ++ixy)
+    {
+        const int offset = ixy * nz + startiz;
+        const int startz = ixy * nplane;
+        for (int iz = 0; iz < nplane; ++iz)
+        {
+            EXPECT_NEAR(this->tmp[offset + iz].real(), this->h_rhor[startz + iz].real(), 1e-4);
+        }
+    }
+    for (int ig = 0; ig < npwk; ++ig)
+    {
+        EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4);
+        EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4);
+    }
+}
 
 TYPED_TEST(PW_BASIS_K_GPU_TEST, FloatDouble)
 {
@@ -284,20 +284,20 @@ TYPED_TEST(PW_BASIS_K_GPU_TEST, convulution)
     ModulePW::PW_Basis_K pwtest;
     pwtest.set_device("gpu");
     pwtest.set_precision("mixing");
-    // if (typeid(T) == typeid(float))
-    // {
-    //     pwtest.fft_bundle.setfft("gpu", "single");
-    // }
-    // if (typeid(T) == typeid(double))
-    // {
+    if (typeid(T) == typeid(float))
+    {
+        pwtest.fft_bundle.setfft("gpu", "single");
+    }
+    if (typeid(T) == typeid(double))
+    {
     std::cout << "Using double precision" << std::endl;
         pwtest.fft_bundle.setfft("gpu", "double");
-    // }
-    // else
-    // {
-    //     cout << "Error: Unsupported type" << endl;
-    //     return;
-    // }
+    }
+    else
+    {
+        cout << "Error: Unsupported type" << endl;
+        return;
+    }
     this->init(pwtest);
     int startiz = pwtest.startz_current;
     const int nx = pwtest.nx;
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,6 @@ if (USE_CUDA)`
`7`	`7`	`list (APPEND FFT_SRC`
`8`	`8`	`module_fft/fft_cuda.cpp`
`9`	`9`	`module_fft/fft_cuda_batch.cpp`
`10`		`- module_fft/test.cu`
`11`	`10`	`)`
`12`	`11`	`endif()`
`13`	`12`	`if (USE_ROCM)`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ if (USE_CUDA)`
`3`	`3`	`AddTest(`
`4`	`4`	`TARGET pw_test_gpu`
`5`	`5`	`LIBS parameter ${math_libs} base planewave device FFTW3::FFTW3_FLOAT`
`6`		`- SOURCES pw_test.cpp pw_basis_k_batch.cpp`
	`6`	`+ SOURCES pw_test.cpp pw_basis_C2R.cpp pw_basis_C2C.cpp pw_basis_k_C2C.cpp pw_basis_k_batch.cpp`
`7`	`7`	`)`
`8`	`8`	`endif()`
`9`	`9`