change recip_to_real func

A-006 · A-006 · commit ecc7f2314dc2 · 2025-03-31T20:57:02.000+08:00
diff --git a/source/module_basis/module_pw/test_gpu/recip_to_real.cpp b/source/module_basis/module_pw/test_gpu/recip_to_real.cpp
@@ -144,21 +144,19 @@ TEST_F(PWTEST,recip_to_real_double)
     delete [] h_rhogrout;
 }
 
-TEST_F(PWTEST,recip_to_real_double)
+TEST_F(PWTEST,recip_to_real_float)
 {
     cout<<"dividemthd 1, gamma_only: off, check fft between double and complex"<<endl;
     ModulePW::PW_Basis pwtest("gpu", precision_flag);
-    pwtest.fft_bundle.setfft("gpu","double");
+    pwtest.fft_bundle.setfft("gpu","single");
     ModuleBase::Matrix3 latvec(1, 1, 0, 0, 1, 1, 0, 0, 2);
-    double wfcecut;
+    double wfcecut = 18;
     double lat0 = 2.2;
     bool gamma_only=false;
-    wfcecut = 18;
     gamma_only = false;
     int distribution_type = 1;
     bool xprime = false;
     
-    //init
 #ifdef __MPI
     pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD);
 #endif
@@ -173,16 +171,13 @@ TEST_F(PWTEST,recip_to_real_double)
     const int nx = pwtest.nx;
     const int ny = pwtest.ny;
     const int nz = pwtest.nz;
-    printf("the nx is %d,the ny is %d\n,the nz is %d\n",nx,ny,nz);
     const int nplane = pwtest.nplane;
-
     const double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
     const double ggecut = wfcecut / tpiba2;
-    ModuleBase::Matrix3 GT,G,GGT;
-    GT = latvec.Inverse();
-	G  = GT.Transpose();
-	GGT = G * GT;
-    complex<double> *tmp = new complex<double> [nx*ny*nz];
+    ModuleBase::Matrix3 GT = latvec.Inverse();
+	ModuleBase::Matrix3 G  = GT.Transpose();
+	ModuleBase::Matrix3 GGT = G * GT;
+    complex<float> *tmp = new complex<float> [nx*ny*nz];
     if(rank_in_pool == 0)
     {
         for(int ix = 0 ; ix < nx ; ++ix)
@@ -192,34 +187,34 @@ TEST_F(PWTEST,recip_to_real_double)
                 for(int iz = 0 ; iz < nz ; ++iz)
                 {
                     tmp[ix*ny*nz + iy*nz + iz]=0.0;
-                    double vx = ix -  int(nx/2);
-                    double vy = iy -  int(ny/2);
-                    double vz = iz -  int(nz/2);
+                    float vx = ix -  int(nx/2);
+                    float vy = iy -  int(ny/2);
+                    float vz = iz -  int(nz/2);
                     ModuleBase::Vector3<double> v(vx,vy,vz);
-                    double modulus = v * (GGT * v);
+                    float modulus = v * (GGT * v);
                     if (modulus <= ggecut)
                     {
                         tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1);
-                        if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1);
-                        else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1);
+                        if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=std::complex<float>(0,1.0) / (std::abs(vx+1) + 1);
+                        else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=std::complex<float>(0,1.0) / (std::abs(-vx+1) + 1);
                     }
                 }
             }   
         }
-        fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
-        fftw_execute(pp);    
-        fftw_destroy_plan(pp); 
+        fftwf_plan pp = fftwf_plan_dft_3d(nx,ny,nz,(fftwf_complex *) tmp, (fftwf_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
+        fftwf_execute(pp);    
+        fftwf_destroy_plan(pp); 
         
-        ModuleBase::Vector3<double> delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); 
+        ModuleBase::Vector3<float> delta_g(float(int(nx/2))/nx, float(int(ny/2))/ny, float(int(nz/2))/nz); 
         for(int ixy = 0 ; ixy < nx * ny ; ++ixy)
         {
             for(int iz = 0 ; iz < nz ; ++iz)
             {
                 int ix = ixy / ny;
                 int iy = ixy % ny;
-                ModuleBase::Vector3<double> real_r(ix, iy, iz);
-                double phase_im = -delta_g * real_r;
-                complex<double> phase(0,ModuleBase::TWO_PI * phase_im);
+                ModuleBase::Vector3<float> real_r(ix, iy, iz);
+                float phase_im = -delta_g * real_r;
+                complex<float> phase(0,ModuleBase::TWO_PI * phase_im);
                 tmp[ixy * nz + iz] *= exp(phase);
             }
         }
@@ -228,14 +223,14 @@ TEST_F(PWTEST,recip_to_real_double)
     MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD);
 #endif
     // const int size = nx * ny * nz;
-    complex<double> * h_rhog = new complex<double> [npw];
-    complex<double> * h_rhogout = new complex<double> [npw];
-    complex<double> * d_rhog;
-    complex<double> * d_rhogr;
-    complex<double> * d_rhogout;
-    cudaMalloc((void**)&d_rhog,npw * sizeof(complex<double>));
-    cudaMalloc((void**)&d_rhogr,npw*sizeof(complex<double>));
-    cudaMalloc((void**)&d_rhogout,npw*sizeof(complex<double>));
+    complex<float> * h_rhog = new complex<float> [npw];
+    complex<float> * h_rhogout = new complex<float> [npw];
+    complex<float> * d_rhog;
+    complex<float> * d_rhogr;
+    complex<float> * d_rhogout;
+    cudaMalloc((void**)&d_rhog,npw * sizeof(complex<float>));
+    cudaMalloc((void**)&d_rhogr,npw*sizeof(complex<float>));
+    cudaMalloc((void**)&d_rhogout,npw*sizeof(complex<float>));
 
     for(int ig = 0 ; ig < npw ; ++ig)
     {
@@ -249,15 +244,15 @@ TEST_F(PWTEST,recip_to_real_double)
             h_rhog[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].x+1) + 1);
         }
     }    
-    cudaMemcpy(d_rhog,h_rhog,npw * sizeof(complex<double>),cudaMemcpyHostToDevice);
-    cudaMemcpy(d_rhogout,h_rhogout,npw * sizeof(complex<double>),cudaMemcpyHostToDevice);
+    cudaMemcpy(d_rhog,h_rhog,npw * sizeof(complex<float>),cudaMemcpyHostToDevice);
+    cudaMemcpy(d_rhogout,h_rhogout,npw * sizeof(complex<float>),cudaMemcpyHostToDevice);
 
-    double * h_rhor = new double [nrxx];
-    double * h_rhogrout = new double [nrxx];
-    double * d_rhor;
-    cudaMalloc((void**)&d_rhor,nrxx * sizeof(double));
-    pwtest.recip_to_real<std::complex<double>,double,base_device::DEVICE_GPU>(d_rhog,d_rhor);
-    cudaMemcpy(h_rhor,d_rhor,nrxx*sizeof(double),cudaMemcpyDeviceToHost);
+    float * h_rhor = new float [nrxx];
+    float * h_rhogrout = new float [nrxx];
+    float * d_rhor;
+    cudaMalloc((void**)&d_rhor,nrxx * sizeof(float));
+    pwtest.recip_to_real<std::complex<float>,float,base_device::DEVICE_GPU>(d_rhog,d_rhor);
+    cudaMemcpy(h_rhor,d_rhor,nrxx*sizeof(float),cudaMemcpyDeviceToHost);
 
     int startiz = pwtest.startz_current;
     for(int ixy = 0 ; ixy < nx * ny ; ++ixy)