deepmodeling
diff --git a/‎source/module_basis/module_pw/test_gpu/recip_to_real_C2R.cpp‎ renamed to ‎source/module_basis/module_pw/test_gpu/pw_basis_C2R.cpp‎
Lines changed: 18 additions & 5 deletions b/‎source/module_basis/module_pw/test_gpu/recip_to_real_C2R.cpp‎ renamed to ‎source/module_basis/module_pw/test_gpu/pw_basis_C2R.cpp‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎source/module_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp‎
Lines changed: 173 additions & 0 deletions b/‎source/module_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp‎
Lines changed: 173 additions & 0 deletions
@@ -43,7 +43,6 @@ TEST_F(PWTEST, recip_to_real_double)
     const int nx = pwtest.nx;
     const int ny = pwtest.ny;
     const int nz = pwtest.nz;
-    printf("the nx is %d,the ny is %d\n,the nz is %d\n", nx, ny, nz);
     const int nplane = pwtest.nplane;
 
     const double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
@@ -103,7 +102,7 @@ TEST_F(PWTEST, recip_to_real_double)
     MPI_Bcast(tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD);
 #endif
     // const int size = nx * ny * nz;
-    complex<double>* h_rhog = new complex<double>[npw];
+    complex<double>* h_rhog  = new complex<double>[npw];
     complex<double>* h_rhogout = new complex<double>[npw];
     complex<double>* d_rhog;
     complex<double>* d_rhogr;
@@ -125,10 +124,8 @@ TEST_F(PWTEST, recip_to_real_double)
         }
     }
     cudaMemcpy(d_rhog, h_rhog, npw * sizeof(complex<double>), cudaMemcpyHostToDevice);
-    cudaMemcpy(d_rhogout, h_rhogout, npw * sizeof(complex<double>), cudaMemcpyHostToDevice);
 
     double* h_rhor = new double[nrxx];
-    double* h_rhogrout = new double[nrxx];
     double* d_rhor;
     cudaMalloc((void**)&d_rhor, nrxx * sizeof(double));
     pwtest.recip_to_real<std::complex<double>, double, base_device::DEVICE_GPU>(d_rhog, d_rhor);
@@ -142,11 +139,19 @@ TEST_F(PWTEST, recip_to_real_double)
             EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz], 1e-6);
         }
     }
+
+    pwtest.real_to_recip<double,std::complex<double>,base_device::DEVICE_GPU>(d_rhor,d_rhog);
+    cudaMemcpy(h_rhogout,d_rhog,npw * sizeof(complex<double>),cudaMemcpyDeviceToHost);
+    for (int ig = 0; ig < npw; ++ig)
+    {
+        EXPECT_NEAR(h_rhog[ig].real(), h_rhogout[ig].real(), 1e-6);
+        EXPECT_NEAR(h_rhog[ig].imag(), h_rhogout[ig].imag(), 1e-6);
+    }
+
     delete[] h_rhog;
     delete[] h_rhogout;
     delete[] h_rhor;
     delete[] tmp;
-    delete[] h_rhogrout;
     cudaFree(d_rhog);
     cudaFree(d_rhogr);
     cudaFree(d_rhogout);
@@ -274,6 +279,14 @@ TEST_F(PWTEST, recip_to_real_float)
             EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz], 1e-6);
         }
     }
+
+    pwtest.real_to_recip<float,std::complex<float>,base_device::DEVICE_GPU>(d_rhor,d_rhog);
+    cudaMemcpy(h_rhogout,d_rhog,npw * sizeof(complex<float>),cudaMemcpyDeviceToHost);
+    for (int ig = 0; ig < npw; ++ig)
+    {
+        EXPECT_NEAR(h_rhog[ig].real(), h_rhogout[ig].real(), 1e-6);
+        EXPECT_NEAR(h_rhog[ig].imag(), h_rhogout[ig].imag(), 1e-6);
+    }
     delete[] h_rhog;
     delete[] h_rhogout;
     delete[] h_rhor;
 
@@ -0,0 +1,173 @@
+//---------------------------------------------
+// TEST for FFT
+//---------------------------------------------
+#include "../pw_basis_k.h"
+#ifdef __MPI
+#include "test_tool.h"
+#include "module_base/parallel_global.h"
+#include "mpi.h"
+#endif
+#include "module_base/constants.h"
+#include "module_base/global_function.h"
+#include "pw_test.h"
+#include "cuda_runtime.h"
+using namespace std;
+TEST_F(PWTEST,pw_basis_k_C2C_double)
+{
+    cout<<"dividemthd 1, gamma_only: on, xprime: false, gamma kpoint, check fft"<<endl;
+    ModulePW::PW_Basis_K pwtest("gpu", "double");
+    ModuleBase::Matrix3 latvec(1, 0.3, 0, 0, 2, 0, 0, 0, 2);
+    double wfcecut;
+    double lat0= 2.7;
+    bool gamma_only;
+    ModuleBase::Vector3<double> *kvec_d;
+    int nks;
+    //--------------------------------------------------
+    nks = 1;
+    kvec_d = new ModuleBase::Vector3<double>[nks];
+    kvec_d[0].set(0,0,0);
+    wfcecut = 10;
+    gamma_only = true;
+    int distribution_type = 1;
+    bool xprime = false;
+    //--------------------------------------------------
+#ifdef __MPI
+    pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD);
+#endif
+    //init //real parameter
+    pwtest.initgrids(lat0,latvec,4*wfcecut);
+    pwtest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime);
+    pwtest.setuptransform();
+    pwtest.collect_local_pw();
+
+    const int nrxx = pwtest.nrxx;
+    const int nmaxgr = pwtest.nmaxgr;
+    const int nx = pwtest.nx;
+    const int ny = pwtest.ny;
+    const int nz = pwtest.nz;
+    const int nplane = pwtest.nplane;
+    const double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
+    const double ggecut = wfcecut / tpiba2;
+    ModuleBase::Matrix3 GT,G,GGT;
+    GT = latvec.Inverse();
+	G  = GT.Transpose();
+	GGT = G * GT;
+    complex<double> *tmp = new complex<double> [nx*ny*nz];
+    complex<double> * rhogr = new complex<double> [nmaxgr];
+    double * rhor = new double [nrxx];
+    for(int ik  = 0; ik < nks; ++ik)
+    {
+        int npwk = pwtest.npwk[ik];
+        if(rank_in_pool == 0)
+        {
+            ModuleBase::Vector3<double> kk = kvec_d[ik];
+            for(int ix = 0 ; ix < nx ; ++ix)
+            {
+                for(int iy = 0 ; iy < ny ; ++iy)
+                {
+                    for(int iz = 0 ; iz < nz ; ++iz)
+                    {
+                        tmp[ix*ny*nz + iy*nz + iz]=0.0;
+                        double vx = ix -  int(nx/2);
+                        double vy = iy -  int(ny/2);
+                        double vz = iz -  int(nz/2);
+                        ModuleBase::Vector3<double> v(vx,vy,vz);
+                        // double modulus = v * (GGT * v);
+                        double modulusgk = (v+kk) * (GGT * (v+kk));
+                        if (modulusgk <= ggecut)
+                        {
+                            tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulusgk+1);
+                            if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1);
+                            else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1);
+                        }
+                    }
+                }   
+            }
+            fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
+            fftw_execute(pp);    
+            fftw_destroy_plan(pp); 
+
+            ModuleBase::Vector3<double> delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); 
+            for(int ixy = 0 ; ixy < nx * ny ; ++ixy)
+            {
+                for(int iz = 0 ; iz < nz ; ++iz)
+                {
+                    int ix = ixy / ny;
+                    int iy = ixy % ny;
+                    ModuleBase::Vector3<double> real_r(ix, iy, iz);
+                    double phase_im = -delta_g * real_r;
+                    complex<double> phase(0,ModuleBase::TWO_PI * phase_im);
+                    tmp[ixy * nz + iz] *= exp(phase);
+                }
+            }
+        }
+#ifdef __MPI
+        MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD);
+#endif
+        complex<double> * h_rhog = new complex<double> [npwk];
+        complex<double> * rhogout = new complex<double> [npwk];
+        for(int ig = 0 ; ig < npwk ; ++ig)
+        {
+            h_rhog[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); 
+            rhogr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1);
+            ModuleBase::Vector3<double> f = pwtest.getgdirect(ik,ig);
+            if(f.y > 0) 
+            {
+                h_rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1);
+                rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1);
+            }
+        }    
+
+        complex<double>* h_rhogout = new complex<double>[npwk];
+        complex<double>* d_rhog;
+        complex<double>* d_rhor;
+        complex<double>* d_rhogout;
+        cudaMalloc((void**)&d_rhog, npwk * sizeof(complex<double>));
+        cudaMalloc((void**)&d_rhor, npwk * sizeof(complex<double>));
+        cudaMalloc((void**)&d_rhogout, npwk * sizeof(complex<double>));
+        pwtest.recip_to_real<std::complex<double>,std::complex<double>,base_device::DEVICE_GPU>(h_rhog,d_rhor,ik); //check out-of-place transform
+
+        int startiz = pwtest.startz_current;
+        for(int ixy = 0 ; ixy < nx * ny ; ++ixy)
+        {
+            for(int iz = 0 ; iz < nplane ; ++iz)
+            {
+                EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6);
+                EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6);
+            }
+        }
+
+        pwtest.real2recip(rhor,rhogout,ik);
+
+        pwtest.real2recip((double*)rhogr,rhogr,ik);
+
+
+        for(int ig = 0 ; ig < npwk ; ++ig)
+        {
+            EXPECT_NEAR(h_rhog[ig].real(),rhogout[ig].real(),1e-6);
+            EXPECT_NEAR(h_rhog[ig].imag(),rhogout[ig].imag(),1e-6);
+            EXPECT_NEAR(h_rhog[ig].real(),rhogr[ig].real(),1e-6);
+            EXPECT_NEAR(h_rhog[ig].imag(),rhogr[ig].imag(),1e-6);
+        }
+
+
+        delete [] h_rhog;
+        delete [] rhogout;
+        //check igl2ig
+        for(int igl = 0; igl < npwk ; ++igl)
+        {        
+            const int isz = pwtest.getigl2isz(ik,igl);
+            for(int ig = 0 ; ig < pwtest.npwk; ++ig)
+            {
+                if(isz == pwtest.ig2isz[ig]){
+                    EXPECT_EQ(ig,pwtest.getigl2ig(ik,igl));}
+            }
+        }
+
+    }
+    delete []tmp; 
+    delete [] rhor;
+    delete[] kvec_d;
+    delete[] rhogr;
+    fftw_cleanup();
+}