From 0fccf66ae726375032e7cce89ebd37e2bf5f372e Mon Sep 17 00:00:00 2001 From: critsium-xy Date: Thu, 16 Jan 2025 10:32:28 +0800 Subject: [PATCH 1/7] Remove all ctx parameters in resize_memory_op --- .../module_base/kernels/test/math_op_test.cpp | 6 +- source/module_base/math_chebyshev.cpp | 32 ++++---- source/module_base/math_ylmreal.cpp | 2 +- .../module_device/cuda/memory_op.cu | 7 +- .../module_base/module_device/memory_op.cpp | 9 +-- source/module_base/module_device/memory_op.h | 6 +- .../module_device/test/memory_test.cpp | 8 +- source/module_base/parallel_device.h | 4 +- .../module_base/test/blas_connector_test.cpp | 12 +-- .../module_pw/kernels/test/pw_op_test.cpp | 16 ++-- .../module_pw/module_fft/fft_cuda.cpp | 4 +- .../module_pw/module_fft/fft_rocm.cpp | 4 +- source/module_basis/module_pw/pw_basis_k.cpp | 22 +++--- .../module_basis/module_pw/pw_basis_sup.cpp | 2 +- .../module_basis/module_pw/pw_distributeg.cpp | 2 +- source/module_elecstate/elecstate_pw.cpp | 33 ++++----- .../kernels/test/elecstate_op_test.cpp | 10 +-- .../potentials/potential_new.cpp | 12 +-- .../module_deltaspin/cal_mw_from_lambda.cpp | 20 ++--- .../hamilt_pwdft/VNL_in_pw.cpp | 74 +++++++++---------- .../hamilt_pwdft/forces_cc.cpp | 24 +++--- .../hamilt_pwdft/forces_nl.cpp | 2 +- .../hamilt_pwdft/forces_onsite.cpp | 2 +- .../hamilt_pwdft/forces_scc.cpp | 12 +-- .../hamilt_pwdft/fs_kin_tools.cpp | 4 +- .../hamilt_pwdft/fs_nonlocal_tools.cpp | 42 +++++------ .../hamilt_pwdft/hamilt_pw.cpp | 6 +- .../kernels/test/ekinetic_op_test.cpp | 6 +- .../kernels/test/force_op_test.cpp | 24 +++--- .../kernels/test/meta_op_test.cpp | 8 +- .../kernels/test/nonlocal_op_test.cpp | 12 +-- .../kernels/test/stress_op_test.cpp | 34 ++++----- .../kernels/test/veff_op_test.cpp | 10 +-- .../hamilt_pwdft/kernels/test/vnl_op_test.cpp | 24 +++--- .../hamilt_pwdft/kernels/test/wf_op_test.cpp | 18 ++--- .../hamilt_pwdft/onsite_proj_tools.cpp | 56 +++++++------- .../hamilt_pwdft/onsite_projector.cpp | 6 +- .../hamilt_pwdft/operator_pw/meta_pw.cpp | 2 +- .../hamilt_pwdft/operator_pw/nonlocal_pw.cpp | 4 +- .../operator_pw/onsite_proj_pw.cpp | 18 ++--- .../hamilt_pwdft/operator_pw/veff_pw.cpp | 8 +- .../hamilt_pwdft/stress_func_cc.cpp | 12 +-- .../hamilt_pwdft/stress_func_loc.cpp | 12 +-- .../hamilt_pwdft/stress_func_nl.cpp | 2 +- .../hamilt_pwdft/stress_func_onsite.cpp | 2 +- .../hamilt_pwdft/structure_factor.cpp | 18 ++--- .../hamilt_pwdft/structure_factor_k.cpp | 4 +- .../hamilt_stodft/sto_che.cpp | 4 +- .../module_hamilt_pw/hamilt_stodft/sto_che.h | 4 +- .../hamilt_stodft/sto_forces.cpp | 2 +- .../hamilt_stodft/sto_iter.cpp | 10 +-- .../hamilt_stodft/sto_stress_pw.cpp | 2 +- source/module_hsolver/diag_const_nums.cpp | 36 +++------ source/module_hsolver/diago_dav_subspace.cpp | 31 ++++---- source/module_hsolver/diago_david.cpp | 31 ++++---- source/module_hsolver/diago_iter_assist.cpp | 28 +++---- .../kernels/cuda/math_kernel_op.cu | 6 +- .../module_hsolver/kernels/math_kernel_op.cpp | 2 +- .../kernels/rocm/math_kernel_op.hip.cu | 6 +- .../kernels/test/math_dngvd_test.cpp | 2 +- .../kernels/test/math_kernel_test.cpp | 42 +++++------ .../kernels/test/perf_math_kernel.cpp | 6 +- source/module_psi/psi.cpp | 8 +- 63 files changed, 426 insertions(+), 451 deletions(-) diff --git a/source/module_base/kernels/test/math_op_test.cpp b/source/module_base/kernels/test/math_op_test.cpp index 7136ab8d35..17ea191656 100644 --- a/source/module_base/kernels/test/math_op_test.cpp +++ b/source/module_base/kernels/test/math_op_test.cpp @@ -306,9 +306,9 @@ TEST_F(TestModuleBaseMathMultiDevice, cal_ylm_real_op_gpu) std::vector ylm(expected_ylm.size(), 0.0); double * d_ylm = nullptr, * d_g = nullptr, * d_p = nullptr; - resmem_var_op()(gpu_ctx, d_g, g.size()); - resmem_var_op()(gpu_ctx, d_p, p.size()); - resmem_var_op()(gpu_ctx, d_ylm, ylm.size()); + resmem_var_op()(d_g, g.size()); + resmem_var_op()(d_p, p.size()); + resmem_var_op()(d_ylm, ylm.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_g, g.data(), g.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_p, p.data(), p.size()); diff --git a/source/module_base/math_chebyshev.cpp b/source/module_base/math_chebyshev.cpp index 9bfac7cac9..6a074c7e71 100644 --- a/source/module_base/math_chebyshev.cpp +++ b/source/module_base/math_chebyshev.cpp @@ -63,8 +63,8 @@ Chebyshev::Chebyshev(const int norder_in) : fftw(2 * EXTEND * nord coefc_cpu = new std::complex[norder]; if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) { - resmem_var_op()(this->ctx, this->coef_real, norder); - resmem_complex_op()(this->ctx, this->coef_complex, norder); + resmem_var_op()(this->coef_real, norder); + resmem_complex_op()(this->coef_complex, norder); } else { @@ -129,7 +129,7 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, pL = (REAL*)psi_L; pR = (REAL*)psi_R; REAL* dot_device = nullptr; - resmem_var_op()(this->ctx, dot_device, 1); + resmem_var_op()(dot_device, 1); container::kernels::blas_dot()(dim2, pL, 1, pR, 1, dot_device); syncmem_var_d2h_op()(cpu_ctx, this->ctx, &result, dot_device, 1); delmem_var_op()(this->ctx, dot_device); @@ -140,7 +140,7 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, pL = (REAL*)psi_L; pR = (REAL*)psi_R; REAL* dot_device = nullptr; - resmem_var_op()(this->ctx, dot_device, 1); + resmem_var_op()(dot_device, 1); for (int i = 0; i < m; ++i) { int dim2 = 2 * N; @@ -427,9 +427,9 @@ void Chebyshev::calfinalvec_real( ndmxt = LDA * m; } - resmem_complex_op()(this->ctx, arraynp1, ndmxt); - resmem_complex_op()(this->ctx, arrayn, ndmxt); - resmem_complex_op()(this->ctx, arrayn_1, ndmxt); + resmem_complex_op()(arraynp1, ndmxt); + resmem_complex_op()(arrayn, ndmxt); + resmem_complex_op()(arrayn_1, ndmxt); memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, ndmxt); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt); @@ -496,9 +496,9 @@ void Chebyshev::calfinalvec_complex( ndmxt = LDA * m; } - resmem_complex_op()(this->ctx, arraynp1, ndmxt); - resmem_complex_op()(this->ctx, arrayn, ndmxt); - resmem_complex_op()(this->ctx, arrayn_1, ndmxt); + resmem_complex_op()(arraynp1, ndmxt); + resmem_complex_op()(arrayn, ndmxt); + resmem_complex_op()(arrayn_1, ndmxt); memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, ndmxt); @@ -595,9 +595,9 @@ void Chebyshev::tracepolyA( ndmxt = LDA * m; } - resmem_complex_op()(this->ctx, arraynp1, ndmxt); - resmem_complex_op()(this->ctx, arrayn, ndmxt); - resmem_complex_op()(this->ctx, arrayn_1, ndmxt); + resmem_complex_op()(arraynp1, ndmxt); + resmem_complex_op()(arrayn, ndmxt); + resmem_complex_op()(arrayn_1, ndmxt); memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, ndmxt); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt); @@ -669,9 +669,9 @@ bool Chebyshev::checkconverge( std::complex* arrayn = nullptr; std::complex* arrayn_1 = nullptr; - resmem_complex_op()(this->ctx, arraynp1, LDA); - resmem_complex_op()(this->ctx, arrayn, LDA); - resmem_complex_op()(this->ctx, arrayn_1, LDA); + resmem_complex_op()(arraynp1, LDA); + resmem_complex_op()(arrayn, LDA); + resmem_complex_op()(arrayn_1, LDA); memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, N); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, N); diff --git a/source/module_base/math_ylmreal.cpp b/source/module_base/math_ylmreal.cpp index 953112996a..80153a2055 100644 --- a/source/module_base/math_ylmreal.cpp +++ b/source/module_base/math_ylmreal.cpp @@ -327,7 +327,7 @@ void YlmReal::Ylm_Real(Device * ctx, const int lmax2, const int ng, const FPTYPE ModuleBase::WARNING_QUIT("YLM_REAL","l>30 or l<0"); } FPTYPE * p = nullptr, * phi = nullptr, * cost = nullptr; - resmem_var_op()(ctx, p, (lmax + 1) * (lmax + 1) * ng, "YlmReal::Ylm_Real"); + resmem_var_op()(p, (lmax + 1) * (lmax + 1) * ng, "YlmReal::Ylm_Real"); cal_ylm_real_op()( ctx, diff --git a/source/module_base/module_device/cuda/memory_op.cu b/source/module_base/module_device/cuda/memory_op.cu index bc9384c446..97445db1f7 100644 --- a/source/module_base/module_device/cuda/memory_op.cu +++ b/source/module_base/module_device/cuda/memory_op.cu @@ -52,14 +52,13 @@ __global__ void cast_memory(std::complex* out, const FPTYPE_in* in, } template -void resize_memory_op::operator()(const base_device::DEVICE_GPU* dev, - FPTYPE*& arr, - const size_t size, +void resize_memory_op::operator()(FPTYPE*& arr, + const size_t size, const char* record_in) { if (arr != nullptr) { - delete_memory_op()(dev, arr); + delete_memory_op()(gpu_ctx, arr); } cudaErrcheck(cudaMalloc((void**)&arr, sizeof(FPTYPE) * size)); std::string record_string; diff --git a/source/module_base/module_device/memory_op.cpp b/source/module_base/module_device/memory_op.cpp index 3c807dfad7..0e8afc6fe8 100644 --- a/source/module_base/module_device/memory_op.cpp +++ b/source/module_base/module_device/memory_op.cpp @@ -18,7 +18,7 @@ namespace memory template struct resize_memory_op { - void operator()(const base_device::DEVICE_CPU* dev, FPTYPE*& arr, const size_t size, const char* record_in) + void operator()(FPTYPE*& arr, const size_t size, const char* record_in) { if (arr != nullptr) { @@ -156,8 +156,7 @@ template struct delete_memory_op*, base_device::DEVICE_CPU> template struct resize_memory_op { - void operator()(const base_device::DEVICE_GPU* dev, - FPTYPE*& arr, + void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr) { @@ -404,10 +403,10 @@ template void resize_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice){ - resize_memory_op()(cpu_ctx, arr); + resize_memory_op()(arr); } else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - resize_memory_op()(gpu_ctx, arr); + resize_memory_op()(arr); } } diff --git a/source/module_base/module_device/memory_op.h b/source/module_base/module_device/memory_op.h index 14926caf9b..af7b2e21a0 100644 --- a/source/module_base/module_device/memory_op.h +++ b/source/module_base/module_device/memory_op.h @@ -18,13 +18,12 @@ struct resize_memory_op /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. /// /// Input Parameters - /// \param dev : the type of computing device /// \param size : array size /// \param record_string : label for memory record /// /// Output Parameters /// \param arr : allocated array - void operator()(const Device* dev, FPTYPE*& arr, const size_t size, const char* record_in = nullptr); + void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); }; template @@ -113,8 +112,7 @@ void delete_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type = base_d template struct resize_memory_op { - void operator()(const base_device::DEVICE_GPU* dev, - FPTYPE*& arr, + void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); }; diff --git a/source/module_base/module_device/test/memory_test.cpp b/source/module_base/module_device/test/memory_test.cpp index 6dc45e5091..ac7d0af22c 100644 --- a/source/module_base/module_device/test/memory_test.cpp +++ b/source/module_base/module_device/test/memory_test.cpp @@ -111,7 +111,7 @@ TEST_F(TestModulePsiMemory, set_memory_op_complex_double_cpu) TEST_F(TestModulePsiMemory, resize_memory_op_double_cpu) { double* xx_tmp = NULL; - resize_memory_double_cpu_op()(cpu_ctx, xx_tmp, xx.size()); + resize_memory_double_cpu_op()(xx_tmp, xx.size()); for (int ii = 0; ii < xx.size(); ii++) { xx_tmp[ii] = xx[ii]; @@ -126,7 +126,7 @@ TEST_F(TestModulePsiMemory, resize_memory_op_double_cpu) TEST_F(TestModulePsiMemory, resize_memory_op_comlex_double_cpu) { std::complex* z_xx_tmp = NULL; - resize_memory_comlex_double_cpu_op()(cpu_ctx, z_xx_tmp, z_xx.size()); + resize_memory_comlex_double_cpu_op()(z_xx_tmp, z_xx.size()); for (int ii = 0; ii < z_xx.size(); ii++) { z_xx_tmp[ii] = z_xx[ii]; @@ -200,7 +200,7 @@ TEST_F(TestModulePsiMemory, set_memory_op_complex_double_gpu) TEST_F(TestModulePsiMemory, resize_memory_op_double_gpu) { double* xx_tmp = NULL; - resize_memory_double_gpu_op()(gpu_ctx, xx_tmp, xx.size()); + resize_memory_double_gpu_op()(xx_tmp, xx.size()); thrust::device_ptr d_xx(xx_tmp); thrust::copy(xx.begin(), xx.end(), d_xx); @@ -217,7 +217,7 @@ TEST_F(TestModulePsiMemory, resize_memory_op_double_gpu) TEST_F(TestModulePsiMemory, resize_memory_op_complex_double_gpu) { std::complex* z_xx_tmp = NULL; - resize_memory_comlex_double_gpu_op()(gpu_ctx, z_xx_tmp, z_xx.size()); + resize_memory_comlex_double_gpu_op()(z_xx_tmp, z_xx.size()); thrust::device_ptr> dz_xx(z_xx_tmp); thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); diff --git a/source/module_base/parallel_device.h b/source/module_base/parallel_device.h index 09625f6303..51a6320bff 100644 --- a/source/module_base/parallel_device.h +++ b/source/module_base/parallel_device.h @@ -37,7 +37,7 @@ void bcast_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm, { if(tmp_space == nullptr) { - base_device::memory::resize_memory_op()(cpu_ctx, object_cpu, n); + base_device::memory::resize_memory_op()(object_cpu, n); alloc = true; } else @@ -74,7 +74,7 @@ void reduce_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm { if(tmp_space == nullptr) { - base_device::memory::resize_memory_op()(cpu_ctx, object_cpu, n); + base_device::memory::resize_memory_op()(object_cpu, n); alloc = true; } else diff --git a/source/module_base/test/blas_connector_test.cpp b/source/module_base/test/blas_connector_test.cpp index 34f4cb51bb..938a1290cb 100644 --- a/source/module_base/test/blas_connector_test.cpp +++ b/source/module_base/test/blas_connector_test.cpp @@ -101,7 +101,7 @@ TEST(blas_connector, ScalGpu) { const int incx = 1; std::complex result[8], answer[8]; std::complex* result_gpu = nullptr; - resmem_zd_op()(gpu_ctx, result_gpu, 8 * sizeof(std::complex)); + resmem_zd_op()(result_gpu, 8 * sizeof(std::complex)); for (int i=0; i< size; i++) { result[i] = std::complex{static_cast(std::rand() / double(RAND_MAX)), static_cast(std::rand() / double(RAND_MAX))}; @@ -198,8 +198,8 @@ TEST(blas_connector, AxpyGpu) { std::array x_const, result, answer; T* x_gpu = nullptr; T* result_gpu = nullptr; - resmem_zd_op()(gpu_ctx, x_gpu, size * sizeof(std::complex)); - resmem_zd_op()(gpu_ctx, result_gpu, size * sizeof(std::complex)); + resmem_zd_op()(x_gpu, size * sizeof(std::complex)); + resmem_zd_op()(result_gpu, size * sizeof(std::complex)); std::generate(x_const.begin(), x_const.end(), []() { return T{static_cast(std::rand() / double(RAND_MAX)), static_cast(std::rand() / double(RAND_MAX))}; @@ -640,9 +640,9 @@ TEST(blas_connector, GemmGpu) { std::complex* a_gpu = nullptr; std::complex* b_gpu = nullptr; std::complex* result_gpu = nullptr; - resmem_zd_op()(gpu_ctx, a_gpu, size_k * lda * sizeof(std::complex)); - resmem_zd_op()(gpu_ctx, b_gpu, size_n * ldb * sizeof(std::complex)); - resmem_zd_op()(gpu_ctx, result_gpu, size_n * ldc * sizeof(std::complex)); + resmem_zd_op()(a_gpu, size_k * lda * sizeof(std::complex)); + resmem_zd_op()(b_gpu, size_n * ldb * sizeof(std::complex)); + resmem_zd_op()(result_gpu, size_n * ldc * sizeof(std::complex)); std::generate(a_const.begin(), a_const.end(), []() { return T{static_cast(std::rand() / double(RAND_MAX)), static_cast(std::rand() / double(RAND_MAX))}; diff --git a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp index 96cc760383..aeead153ac 100644 --- a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp +++ b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp @@ -102,9 +102,9 @@ TEST_F(TestModulePWPWMultiDevice, set_3d_fft_box_op_gpu) std::vector> res(out_1.size(), std::complex{0, 0}); int * d_box_index = NULL; std::complex* d_res = NULL, * d_in_1 = NULL; - resize_memory_int_gpu_op()(gpu_ctx, d_box_index, box_index.size()); - resize_memory_complex_gpu_op()(gpu_ctx, d_res, res.size()); - resize_memory_complex_gpu_op()(gpu_ctx, d_in_1, in_1.size()); + resize_memory_int_gpu_op()(d_box_index, box_index.size()); + resize_memory_complex_gpu_op()(d_res, res.size()); + resize_memory_complex_gpu_op()(d_in_1, in_1.size()); synchronize_memory_int_h2d_op()(gpu_ctx, cpu_ctx, d_box_index, box_index.data(), box_index.size()); synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in_1, in_1.data(), in_1.size()); @@ -125,8 +125,8 @@ TEST_F(TestModulePWPWMultiDevice, set_recip_to_real_output_op_gpu) { std::vector> res(out_2.size(), std::complex{0, 0}); std::complex* d_res = NULL, * d_in_2 = NULL; - resize_memory_complex_gpu_op()(gpu_ctx, d_res, res.size()); - resize_memory_complex_gpu_op()(gpu_ctx, d_in_2, in_2.size()); + resize_memory_complex_gpu_op()(d_res, res.size()); + resize_memory_complex_gpu_op()(d_in_2, in_2.size()); synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in_2, in_2.data(), in_2.size()); @@ -146,9 +146,9 @@ TEST_F(TestModulePWPWMultiDevice, set_real_to_recip_output_op_gpu) std::vector> res = out_3_init; int * d_box_index = NULL; std::complex* d_res = NULL, * d_in_3 = NULL; - resize_memory_int_gpu_op()(gpu_ctx, d_box_index, box_index.size()); - resize_memory_complex_gpu_op()(gpu_ctx, d_res, res.size()); - resize_memory_complex_gpu_op()(gpu_ctx, d_in_3, in_3.size()); + resize_memory_int_gpu_op()(d_box_index, box_index.size()); + resize_memory_complex_gpu_op()(d_res, res.size()); + resize_memory_complex_gpu_op()(d_in_3, in_3.size()); synchronize_memory_int_h2d_op()(gpu_ctx, cpu_ctx, d_box_index, box_index.data(), box_index.size()); synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in_3, in_3.data(), in_3.size()); diff --git a/source/module_basis/module_pw/module_fft/fft_cuda.cpp b/source/module_basis/module_pw/module_fft/fft_cuda.cpp index db93fb07fb..aa132cf071 100644 --- a/source/module_basis/module_pw/module_fft/fft_cuda.cpp +++ b/source/module_basis/module_pw/module_fft/fft_cuda.cpp @@ -17,14 +17,14 @@ template <> void FFT_CUDA::setupFFT() { cufftPlan3d(&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C); - resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz); + resmem_cd_op()(this->c_auxr_3d, this->nx * this->ny * this->nz); } template <> void FFT_CUDA::setupFFT() { cufftPlan3d(&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z); - resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz); + resmem_zd_op()(this->z_auxr_3d, this->nx * this->ny * this->nz); } template <> void FFT_CUDA::cleanFFT() diff --git a/source/module_basis/module_pw/module_fft/fft_rocm.cpp b/source/module_basis/module_pw/module_fft/fft_rocm.cpp index 9973c72901..9b8702a25e 100644 --- a/source/module_basis/module_pw/module_fft/fft_rocm.cpp +++ b/source/module_basis/module_pw/module_fft/fft_rocm.cpp @@ -16,14 +16,14 @@ template <> void FFT_ROCM::setupFFT() { hipfftPlan3d(&c_handle, this->nx, this->ny, this->nz, HIPFFT_C2C); - resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz); + resmem_cd_op()(this->c_auxr_3d, this->nx * this->ny * this->nz); } template <> void FFT_ROCM::setupFFT() { hipfftPlan3d(&z_handle, this->nx, this->ny, this->nz, HIPFFT_Z2Z); - resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz); + resmem_zd_op()(this->z_auxr_3d, this->nx * this->ny * this->nz); } template <> void FFT_ROCM::cleanFFT() diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp index f670ee9bf9..4e452bd87f 100644 --- a/source/module_basis/module_pw/pw_basis_k.cpp +++ b/source/module_basis/module_pw/pw_basis_k.cpp @@ -99,16 +99,16 @@ void PW_Basis_K:: initparameters( #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { if (this->precision == "single") { - resmem_sd_op()(gpu_ctx, this->s_kvec_c, this->nks * 3); + resmem_sd_op()(this->s_kvec_c, this->nks * 3); castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } - resmem_dd_op()(gpu_ctx, this->d_kvec_c, this->nks * 3); + resmem_dd_op()(this->d_kvec_c, this->nks * 3); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } else { #endif if (this->precision == "single") { - resmem_sh_op()(cpu_ctx, this->s_kvec_c, this->nks * 3); + resmem_sh_op()(this->s_kvec_c, this->nks * 3); castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } this->d_kvec_c = reinterpret_cast(&this->kvec_c[0][0]); @@ -164,7 +164,7 @@ void PW_Basis_K::setupIndGk() } #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - resmem_int_op()(gpu_ctx, this->d_igl2isz_k, this->npwk_max * this->nks); + resmem_int_op()(this->d_igl2isz_k, this->npwk_max * this->nks); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_igl2isz_k, this->igl2isz_k, this->npwk_max * this->nks); } #endif @@ -247,14 +247,14 @@ void PW_Basis_K::collect_local_pw(const double& erf_ecut_in, const double& erf_h #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { if (this->precision == "single") { - resmem_sd_op()(gpu_ctx, this->s_gk2, this->npwk_max * this->nks); - resmem_sd_op()(gpu_ctx, this->s_gcar, this->npwk_max * this->nks * 3); + resmem_sd_op()(this->s_gk2, this->npwk_max * this->nks); + resmem_sd_op()(this->s_gcar, this->npwk_max * this->nks * 3); castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_gk2, this->gk2, this->npwk_max * this->nks); castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); } else { - resmem_dd_op()(gpu_ctx, this->d_gk2, this->npwk_max * this->nks); - resmem_dd_op()(gpu_ctx, this->d_gcar, this->npwk_max * this->nks * 3); + resmem_dd_op()(this->d_gk2, this->npwk_max * this->nks); + resmem_dd_op()(this->d_gcar, this->npwk_max * this->nks * 3); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_gk2, this->gk2, this->npwk_max * this->nks); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); } @@ -262,8 +262,8 @@ void PW_Basis_K::collect_local_pw(const double& erf_ecut_in, const double& erf_h else { #endif if (this->precision == "single") { - resmem_sh_op()(cpu_ctx, this->s_gk2, this->npwk_max * this->nks, "PW_B_K::s_gk2"); - resmem_sh_op()(cpu_ctx, this->s_gcar, this->npwk_max * this->nks * 3, "PW_B_K::s_gcar"); + resmem_sh_op()(this->s_gk2, this->npwk_max * this->nks, "PW_B_K::s_gk2"); + resmem_sh_op()(this->s_gcar, this->npwk_max * this->nks * 3, "PW_B_K::s_gcar"); castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_gk2, this->gk2, this->npwk_max * this->nks); castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); } @@ -355,7 +355,7 @@ void PW_Basis_K::get_ig2ixyz_k() ig2ixyz_k_cpu[igl + ik * npwk_max] = iz + iy * nz + ix * ny * nz; } } - resmem_int_op()(gpu_ctx, ig2ixyz_k, this->npwk_max * this->nks); + resmem_int_op()(ig2ixyz_k, this->npwk_max * this->nks); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->ig2ixyz_k, ig2ixyz_k_cpu, this->npwk_max * this->nks); delete[] ig2ixyz_k_cpu; } diff --git a/source/module_basis/module_pw/pw_basis_sup.cpp b/source/module_basis/module_pw/pw_basis_sup.cpp index 1d34682a96..3965db2404 100644 --- a/source/module_basis/module_pw/pw_basis_sup.cpp +++ b/source/module_basis/module_pw/pw_basis_sup.cpp @@ -420,7 +420,7 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - resmem_int_op()(gpu_ctx, d_is2fftixy, this->nst); + resmem_int_op()(d_is2fftixy, this->nst); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_is2fftixy, this->is2fftixy, this->nst); } #endif diff --git a/source/module_basis/module_pw/pw_distributeg.cpp b/source/module_basis/module_pw/pw_distributeg.cpp index c93ff9357a..5c2584642b 100644 --- a/source/module_basis/module_pw/pw_distributeg.cpp +++ b/source/module_basis/module_pw/pw_distributeg.cpp @@ -194,7 +194,7 @@ void PW_Basis::get_ig2isz_is2fftixy( } #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - resmem_int_op()(gpu_ctx, d_is2fftixy, this->nst); + resmem_int_op()(d_is2fftixy, this->nst); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_is2fftixy, this->is2fftixy, this->nst); } #endif diff --git a/source/module_elecstate/elecstate_pw.cpp b/source/module_elecstate/elecstate_pw.cpp index f55f2ec447..24fdedebd6 100644 --- a/source/module_elecstate/elecstate_pw.cpp +++ b/source/module_elecstate/elecstate_pw.cpp @@ -66,7 +66,7 @@ void ElecStatePW::init_rho_data() if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { this->rho = new Real*[this->charge->nspin]; - resmem_var_op()(this->ctx, this->rho_data, this->charge->nspin * this->charge->nrxx); + resmem_var_op()(this->rho_data, this->charge->nspin * this->charge->nrxx); for (int ii = 0; ii < this->charge->nspin; ii++) { this->rho[ii] = this->rho_data + ii * this->charge->nrxx; @@ -74,7 +74,7 @@ void ElecStatePW::init_rho_data() if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) { this->rhog = new T*[this->charge->nspin]; - resmem_complex_op()(this->ctx, this->rhog_data, this->charge->nspin * this->charge->rhopw->npw); + resmem_complex_op()(this->rhog_data, this->charge->nspin * this->charge->rhopw->npw); for (int ii = 0; ii < this->charge->nspin; ii++) { this->rhog[ii] = this->rhog_data + ii * this->charge->rhopw->npw; @@ -83,7 +83,7 @@ void ElecStatePW::init_rho_data() if (get_xc_func_type() == 3 || PARAM.inp.out_elf[0] > 0) { this->kin_r = new Real*[this->charge->nspin]; - resmem_var_op()(this->ctx, this->kin_r_data, this->charge->nspin * this->charge->nrxx); + resmem_var_op()(this->kin_r_data, this->charge->nspin * this->charge->nrxx); for (int ii = 0; ii < this->charge->nspin; ii++) { this->kin_r[ii] = this->kin_r_data + ii * this->charge->nrxx; } @@ -101,8 +101,8 @@ void ElecStatePW::init_rho_data() this->kin_r = reinterpret_cast(this->charge->kin_r); } } - resmem_complex_op()(this->ctx, this->wfcr, this->basis->nmaxgr, "ElecSPW::wfcr"); - resmem_complex_op()(this->ctx, this->wfcr_another_spin, this->basis->nrxx, "ElecSPW::wfcr_a"); + resmem_complex_op()(this->wfcr, this->basis->nmaxgr, "ElecSPW::wfcr"); + resmem_complex_op()(this->wfcr_another_spin, this->basis->nrxx, "ElecSPW::wfcr_a"); this->init_rho = true; } @@ -277,9 +277,9 @@ void ElecStatePW::cal_becsum(const psi::Psi& psi) const int nkb = this->ppcell->nkb; this->vkb = this->ppcell->template get_vkb_data(); T* becp = nullptr; - resmem_complex_op()(this->ctx, becp, nbands * nkb, "ElecState::becp"); + resmem_complex_op()(becp, nbands * nkb, "ElecState::becp"); const int nh_tot = this->ppcell->nhm * (this->ppcell->nhm + 1) / 2; - resmem_var_op()(this->ctx, becsum, nh_tot * ucell->nat * PARAM.inp.nspin, "ElecState::becsum"); + resmem_var_op()(becsum, nh_tot * ucell->nat * PARAM.inp.nspin, "ElecState::becsum"); setmem_var_op()(this->ctx, becsum, 0, nh_tot * ucell->nat * PARAM.inp.nspin); for (int ik = 0; ik < psi.get_nk(); ++ik) @@ -340,10 +340,9 @@ void ElecStatePW::cal_becsum(const psi::Psi& psi) if (atom->ncpp.tvanp) { T *auxk1 = nullptr, *auxk2 = nullptr, *aux_gk = nullptr; - resmem_complex_op()(this->ctx, auxk1, nbands * atom->ncpp.nh, "ElecState::auxk1"); - resmem_complex_op()(this->ctx, auxk2, nbands * atom->ncpp.nh, "ElecState::auxk2"); - resmem_complex_op()(this->ctx, - aux_gk, + resmem_complex_op()(auxk1, nbands * atom->ncpp.nh, "ElecState::auxk1"); + resmem_complex_op()(auxk2, nbands * atom->ncpp.nh, "ElecState::auxk2"); + resmem_complex_op()(aux_gk, atom->ncpp.nh * atom->ncpp.nh * npol * npol, "ElecState::aux_gk"); for (int ia = 0; ia < atom->na; ia++) @@ -469,11 +468,11 @@ void ElecStatePW::addusdens_g(const Real* becsum, T** rhog) const std::complex ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI; Real* qmod = nullptr; - resmem_var_op()(this->ctx, qmod, npw, "ElecState::qmod"); + resmem_var_op()(qmod, npw, "ElecState::qmod"); T* qgm = nullptr; - resmem_complex_op()(this->ctx, qgm, npw, "ElecState::qgm"); + resmem_complex_op()(qgm, npw, "ElecState::qgm"); Real* ylmk0 = nullptr; - resmem_var_op()(this->ctx, ylmk0, npw * lmaxq * lmaxq, "ElecState::ylmk0"); + resmem_var_op()(ylmk0, npw * lmaxq * lmaxq, "ElecState::ylmk0"); Real* g = reinterpret_cast(this->charge->rhopw->gcar); ModuleBase::YlmReal::Ylm_Real(this->ctx, lmaxq * lmaxq, npw, g, ylmk0); @@ -492,9 +491,9 @@ void ElecStatePW::addusdens_g(const Real* becsum, T** rhog) const int nij = atom->ncpp.nh * (atom->ncpp.nh + 1) / 2; T *skk = nullptr, *aux2 = nullptr, *tbecsum = nullptr; - resmem_complex_op()(this->ctx, skk, atom->na * npw, "ElecState::skk"); - resmem_complex_op()(this->ctx, aux2, nij * npw, "ElecState::aux2"); - resmem_complex_op()(this->ctx, tbecsum, PARAM.inp.nspin * atom->na * nij, "ElecState::tbecsum"); + resmem_complex_op()(skk, atom->na * npw, "ElecState::skk"); + resmem_complex_op()(aux2, nij * npw, "ElecState::aux2"); + resmem_complex_op()(tbecsum, PARAM.inp.nspin * atom->na * nij, "ElecState::tbecsum"); for (int ia = 0; ia < atom->na; ia++) { const int iat = ucell->itia2iat(it, ia); diff --git a/source/module_elecstate/kernels/test/elecstate_op_test.cpp b/source/module_elecstate/kernels/test/elecstate_op_test.cpp index 79635c7895..499a11e811 100644 --- a/source/module_elecstate/kernels/test/elecstate_op_test.cpp +++ b/source/module_elecstate/kernels/test/elecstate_op_test.cpp @@ -107,8 +107,8 @@ TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_op_gpu) std::vector rho_data(expected_rho.size(), 0); double* d_rho_data = NULL; std::complex* d_wfcr = NULL; - resize_memory_var_op()(gpu_ctx, d_rho_data, rho_data.size()); - resize_memory_complex_op()(gpu_ctx, d_wfcr, wfcr.size()); + resize_memory_var_op()(d_rho_data, rho_data.size()); + resize_memory_complex_op()(d_wfcr, wfcr.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_rho_data, rho_data.data(), rho_data.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_wfcr, wfcr.data(), wfcr.size()); double ** rho = new double* [1]; @@ -136,9 +136,9 @@ TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_spin_op_gpu) double* d_rho_data_2 = NULL; std::complex* d_wfcr_2 = NULL; std::complex* d_wfcr_another_spin_2 = NULL; - resize_memory_var_op()(gpu_ctx, d_rho_data_2, rho_data_2.size()); - resize_memory_complex_op()(gpu_ctx, d_wfcr_2, wfcr_2.size()); - resize_memory_complex_op()(gpu_ctx, d_wfcr_another_spin_2, wfcr_another_spin_2.size()); + resize_memory_var_op()(d_rho_data_2, rho_data_2.size()); + resize_memory_complex_op()(d_wfcr_2, wfcr_2.size()); + resize_memory_complex_op()(d_wfcr_another_spin_2, wfcr_another_spin_2.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_rho_data_2, rho_data_2.data(), rho_data_2.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_wfcr_2, wfcr_2.data(), wfcr_2.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_wfcr_another_spin_2, wfcr_another_spin_2.data(), wfcr_another_spin_2.size()); diff --git a/source/module_elecstate/potentials/potential_new.cpp b/source/module_elecstate/potentials/potential_new.cpp index a4443c46d8..8b62f695f7 100644 --- a/source/module_elecstate/potentials/potential_new.cpp +++ b/source/module_elecstate/potentials/potential_new.cpp @@ -133,18 +133,18 @@ void Potential::allocate() } if (PARAM.inp.basis_type == "pw" && PARAM.inp.device == "gpu") { if (PARAM.inp.precision == "single") { - resmem_sd_op()(gpu_ctx, s_veff_smooth, PARAM.inp.nspin * nrxx_smooth); - resmem_sd_op()(gpu_ctx, s_vofk_smooth, PARAM.inp.nspin * nrxx_smooth); + resmem_sd_op()(s_veff_smooth, PARAM.inp.nspin * nrxx_smooth); + resmem_sd_op()(s_vofk_smooth, PARAM.inp.nspin * nrxx_smooth); } else { - resmem_dd_op()(gpu_ctx, d_veff_smooth, PARAM.inp.nspin * nrxx_smooth); - resmem_dd_op()(gpu_ctx, d_vofk_smooth, PARAM.inp.nspin * nrxx_smooth); + resmem_dd_op()(d_veff_smooth, PARAM.inp.nspin * nrxx_smooth); + resmem_dd_op()(d_vofk_smooth, PARAM.inp.nspin * nrxx_smooth); } } else { if (PARAM.inp.precision == "single") { - resmem_sh_op()(cpu_ctx, s_veff_smooth, PARAM.inp.nspin * nrxx_smooth, "POT::sveff_smooth"); - resmem_sh_op()(cpu_ctx, s_vofk_smooth, PARAM.inp.nspin * nrxx_smooth, "POT::svofk_smooth"); + resmem_sh_op()(s_veff_smooth, PARAM.inp.nspin * nrxx_smooth, "POT::sveff_smooth"); + resmem_sh_op()(s_vofk_smooth, PARAM.inp.nspin * nrxx_smooth, "POT::svofk_smooth"); } else { this->d_veff_smooth = this->veff_smooth.c; diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp index 87a2fa41cc..65853aa0d8 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp @@ -27,7 +27,7 @@ void spinconstrain::SpinConstrain>::calculate_delta_hcc(std #if ((defined __CUDA) || (defined __ROCM)) base_device::DEVICE_GPU* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(cpu_ctx, becp_cpu, size_ps); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(becp_cpu, size_ps); base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(cpu_ctx, ctx, becp_cpu, becp_k, size_ps); #endif } @@ -68,7 +68,7 @@ void spinconstrain::SpinConstrain>::calculate_delta_hcc(std #if ((defined __CUDA) || (defined __ROCM)) base_device::DEVICE_GPU* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, ps_pointer, size_ps); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ps_pointer, size_ps); base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(ctx, cpu_ctx, ps_pointer, ps.data(), size_ps); #endif } @@ -260,20 +260,20 @@ void spinconstrain::SpinConstrain>::cal_mw_from_lambda(int becp_tmp.resize(size_becp * nk); std::complex* h_tmp = nullptr; std::complex* s_tmp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, h_tmp, nbands * nbands); - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, s_tmp, nbands * nbands); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(h_tmp, nbands * nbands); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(s_tmp, nbands * nbands); int initial_hs = 0; if(this->sub_h_save == nullptr) { initial_hs = 1; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, this->sub_h_save, nbands * nbands * nk); - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, this->sub_s_save, nbands * nbands * nk); - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, this->becp_save, size_becp * nk); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(this->sub_h_save, nbands * nbands * nk); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(this->sub_s_save, nbands * nbands * nk); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(this->becp_save, size_becp * nk); } std::complex* becp_pointer = nullptr; // allocate memory for becp_pointer in GPU device - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, becp_pointer, size_becp); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(becp_pointer, size_becp); for (int ik = 0; ik < nk; ++ik) { psi_t->fix_k(ik); @@ -462,8 +462,8 @@ void spinconstrain::SpinConstrain>::update_psi_charge(const std::complex* h_tmp = nullptr; std::complex* s_tmp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, h_tmp, nbands * nbands); - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ctx, s_tmp, nbands * nbands); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(h_tmp, nbands * nbands); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(s_tmp, nbands * nbands); assert(this->sub_h_save != nullptr); assert(this->sub_s_save != nullptr); assert(this->becp_save != nullptr); diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index b41c8f476e..0f7a6127aa 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -158,42 +158,40 @@ void pseudopot_cell_vnl::init(const UnitCell& ucell, { if (PARAM.inp.precision == "single") { - resmem_sd_op()(gpu_ctx, s_deeq, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); - resmem_sd_op()(gpu_ctx, s_nhtol, ntype * this->nhm); - resmem_sd_op()(gpu_ctx, s_nhtolm, ntype * this->nhm); - resmem_sd_op()(gpu_ctx, s_indv, ntype * this->nhm); - resmem_sd_op()(gpu_ctx, s_qq_nt, ntype * this->nhm * this->nhm); - resmem_cd_op()(gpu_ctx, c_deeq_nc, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); - resmem_cd_op()(gpu_ctx, c_qq_so, ntype * 4 * this->nhm * this->nhm); + resmem_sd_op()(s_deeq, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); + resmem_sd_op()(s_nhtol, ntype * this->nhm); + resmem_sd_op()(s_nhtolm, ntype * this->nhm); + resmem_sd_op()(s_indv, ntype * this->nhm); + resmem_sd_op()(s_qq_nt, ntype * this->nhm * this->nhm); + resmem_cd_op()(c_deeq_nc, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); + resmem_cd_op()(c_qq_so, ntype * 4 * this->nhm * this->nhm); } else { - resmem_zd_op()(gpu_ctx, z_deeq_nc, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); - resmem_zd_op()(gpu_ctx, z_qq_so, ntype * 4 * this->nhm * this->nhm); + resmem_zd_op()(z_deeq_nc, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); + resmem_zd_op()(z_qq_so, ntype * 4 * this->nhm * this->nhm); } - resmem_dd_op()(gpu_ctx, d_deeq, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); - resmem_dd_op()(gpu_ctx, d_indv, ntype * this->nhm); - resmem_dd_op()(gpu_ctx, d_nhtol, ntype * this->nhm); - resmem_dd_op()(gpu_ctx, d_nhtolm, ntype * this->nhm); - resmem_dd_op()(gpu_ctx, d_qq_nt, ntype * this->nhm * this->nhm); + resmem_dd_op()(d_deeq, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm); + resmem_dd_op()(d_indv, ntype * this->nhm); + resmem_dd_op()(d_nhtol, ntype * this->nhm); + resmem_dd_op()(d_nhtolm, ntype * this->nhm); + resmem_dd_op()(d_qq_nt, ntype * this->nhm * this->nhm); } else { if (PARAM.inp.precision == "single") { - resmem_sh_op()(cpu_ctx, - s_deeq, + resmem_sh_op()(s_deeq, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm, "VNL::s_deeq"); - resmem_sh_op()(cpu_ctx, s_nhtol, ntype * this->nhm, "VNL::s_nhtol"); - resmem_sh_op()(cpu_ctx, s_nhtolm, ntype * this->nhm, "VNL::s_nhtolm"); - resmem_sh_op()(cpu_ctx, s_indv, ntype * this->nhm, "VNL::s_indv"); - resmem_sh_op()(cpu_ctx, s_qq_nt, ntype * this->nhm * this->nhm, "VNL::s_qq_nt"); - resmem_ch_op()(cpu_ctx, - c_deeq_nc, + resmem_sh_op()(s_nhtol, ntype * this->nhm, "VNL::s_nhtol"); + resmem_sh_op()(s_nhtolm, ntype * this->nhm, "VNL::s_nhtolm"); + resmem_sh_op()(s_indv, ntype * this->nhm, "VNL::s_indv"); + resmem_sh_op()(s_qq_nt, ntype * this->nhm * this->nhm, "VNL::s_qq_nt"); + resmem_ch_op()(c_deeq_nc, PARAM.inp.nspin * ucell.nat * this->nhm * this->nhm, "VNL::c_deeq_nc"); - resmem_ch_op()(cpu_ctx, c_qq_so, ntype * 4 * this->nhm * this->nhm, "VNL::c_qq_so"); + resmem_ch_op()(c_qq_so, ntype * 4 * this->nhm * this->nhm, "VNL::c_qq_so"); } else { @@ -275,18 +273,18 @@ void pseudopot_cell_vnl::init(const UnitCell& ucell, { if (PARAM.inp.precision == "single") { - resmem_sd_op()(gpu_ctx, s_tab, this->tab.getSize()); - resmem_cd_op()(gpu_ctx, c_vkb, nkb * npwx); + resmem_sd_op()(s_tab, this->tab.getSize()); + resmem_cd_op()(c_vkb, nkb * npwx); } - resmem_zd_op()(gpu_ctx, z_vkb, nkb * npwx); - resmem_dd_op()(gpu_ctx, d_tab, this->tab.getSize()); + resmem_zd_op()(z_vkb, nkb * npwx); + resmem_dd_op()(d_tab, this->tab.getSize()); } else { if (PARAM.inp.precision == "single") { - resmem_sh_op()(cpu_ctx, s_tab, this->tab.getSize()); - resmem_ch_op()(cpu_ctx, c_vkb, nkb * npwx); + resmem_sh_op()(s_tab, this->tab.getSize()); + resmem_ch_op()(c_vkb, nkb * npwx); } this->z_vkb = this->vkb.c; this->d_tab = this->tab.ptr; @@ -339,7 +337,7 @@ void pseudopot_cell_vnl::getvnl(const int& ik, const UnitCell& ucell, ModuleBase using resmem_complex_op = base_device::memory::resize_memory_op, Device>; using delmem_complex_op = base_device::memory::delete_memory_op, Device>; std::complex* sk = nullptr; - resmem_complex_op()(ctx, sk, ucell.nat * npw, "VNL::sk"); + resmem_complex_op()(sk, ucell.nat * npw, "VNL::sk"); this->psf->get_sk(ctx, ik, this->wfcpw, sk); int jkb = 0, iat = 0; @@ -457,8 +455,8 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, FPTYPE *vkb1 = nullptr, *gk = nullptr, *ylm = nullptr, *_tab = this->get_tab_data(), *_indv = this->get_indv_data(), *_nhtol = this->get_nhtol_data(), *_nhtolm = this->get_nhtolm_data(); - resmem_var_op()(ctx, ylm, x1 * npw, "VNL::ylm"); - resmem_var_op()(ctx, vkb1, nhm * npw, "VNL::vkb1"); + resmem_var_op()(ylm, x1 * npw, "VNL::ylm"); + resmem_var_op()(vkb1, nhm * npw, "VNL::vkb1"); ModuleBase::Vector3* _gk = new ModuleBase::Vector3[npw]; #ifdef _OPENMP @@ -470,14 +468,14 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, } if (PARAM.inp.device == "gpu") { - resmem_int_op()(ctx, atom_nh, ucell.ntype); - resmem_int_op()(ctx, atom_nb, ucell.ntype); - resmem_int_op()(ctx, atom_na, ucell.ntype); + resmem_int_op()(atom_nh, ucell.ntype); + resmem_int_op()(atom_nb, ucell.ntype); + resmem_int_op()(atom_na, ucell.ntype); syncmem_int_op()(ctx, cpu_ctx, atom_nh, h_atom_nh, ucell.ntype); syncmem_int_op()(ctx, cpu_ctx, atom_nb, h_atom_nb, ucell.ntype); syncmem_int_op()(ctx, cpu_ctx, atom_na, h_atom_na, ucell.ntype); - resmem_var_op()(ctx, gk, npw * 3); + resmem_var_op()(gk, npw * 3); castmem_var_h2d_op()(ctx, cpu_ctx, gk, reinterpret_cast(_gk), npw * 3); } else @@ -487,7 +485,7 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, atom_na = h_atom_na; if (PARAM.inp.precision == "single") { - resmem_var_op()(ctx, gk, npw * 3); + resmem_var_op()(gk, npw * 3); castmem_var_h2h_op()(cpu_ctx, cpu_ctx, gk, reinterpret_cast(_gk), npw * 3); } else @@ -499,7 +497,7 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, ModuleBase::YlmReal::Ylm_Real(ctx, x1, npw, gk, ylm); std::complex* sk = nullptr; - resmem_complex_op()(ctx, sk, ucell.nat * npw); + resmem_complex_op()(sk, ucell.nat * npw); this->psf->get_sk(ctx, ik, this->wfcpw, sk); cal_vnl_op()(ctx, diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp index 3346724deb..f055463e91 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp @@ -134,12 +134,12 @@ void Forces::cal_force_cc(ModuleBase::matrix& forcecc, } if(this->device == base_device::GpuDevice ) { - resmem_var_op()(this->ctx, gv_x_d, rho_basis->npw); - resmem_var_op()(this->ctx, gv_y_d, rho_basis->npw); - resmem_var_op()(this->ctx, gv_z_d, rho_basis->npw); - resmem_var_op()(this->ctx, rhocgigg_vec_d, rho_basis->npw); - resmem_complex_op()(this->ctx, psiv_d, rho_basis->nmaxgr); - resmem_var_op()(this->ctx, force_d, 3); + resmem_var_op()(gv_x_d, rho_basis->npw); + resmem_var_op()(gv_y_d, rho_basis->npw); + resmem_var_op()(gv_z_d, rho_basis->npw); + resmem_var_op()(rhocgigg_vec_d, rho_basis->npw); + resmem_complex_op()(psiv_d, rho_basis->nmaxgr); + resmem_var_op()(force_d, 3); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gv_x_d, gv_x.data(), rho_basis->npw); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gv_y_d, gv_y.data(), rho_basis->npw); @@ -308,13 +308,13 @@ void Forces::deriv_drhoc double *aux_d = nullptr; double *drhocg_d = nullptr; if(this->device == base_device::GpuDevice ) { - resmem_var_op()(this->ctx, r_d, mesh); - resmem_var_op()(this->ctx, rhoc_d, mesh); - resmem_var_op()(this->ctx, rab_d, mesh); + resmem_var_op()(r_d, mesh); + resmem_var_op()(rhoc_d, mesh); + resmem_var_op()(rab_d, mesh); - resmem_var_op()(this->ctx, aux_d, mesh); - resmem_var_op()(this->ctx, gx_arr_d, rho_basis->ngg); - resmem_var_op()(this->ctx, drhocg_d, rho_basis->ngg); + resmem_var_op()(aux_d, mesh); + resmem_var_op()(gx_arr_d, rho_basis->ngg); + resmem_var_op()(drhocg_d, rho_basis->ngg); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gx_arr_d, gx_arr.data(), rho_basis->ngg); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, mesh); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp index 8ecba030f3..c02dc4789a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp @@ -27,7 +27,7 @@ void Forces::cal_force_nl(ModuleBase::matrix& forcenl, // allocate memory for the force FPTYPE* force = nullptr; - resmem_var_op()(this->ctx, force, ucell_in.nat * 3); + resmem_var_op()(force, ucell_in.nat * 3); base_device::memory::set_memory_op()(this->ctx, force, 0.0, ucell_in.nat * 3); hamilt::FS_Nonlocal_tools nl_tools(&nlpp, &ucell_in, p_kv, wfc_basis, p_sf, wg, &ekb); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp index 240187b3ba..bbcf883056 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp @@ -23,7 +23,7 @@ void Forces::cal_force_onsite(ModuleBase::matrix& force_onsite, // allocate memory for the force FPTYPE* force = nullptr; - resmem_var_op()(this->ctx, force, ucell_in.nat * 3); + resmem_var_op()(force, ucell_in.nat * 3); base_device::memory::set_memory_op()(this->ctx, force, 0.0, ucell_in.nat * 3); auto* onsite_p = projectors::OnsiteProjector::get_instance(); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp index f670ad9b27..347212221e 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp @@ -190,13 +190,13 @@ void Forces::deriv_drhoc_scc(const bool& numeric, double *aux_d = nullptr; double *drhocg_d = nullptr; if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, r_d, mesh); - resmem_var_op()(this->ctx, rhoc_d, mesh); - resmem_var_op()(this->ctx, rab_d, mesh); + resmem_var_op()(r_d, mesh); + resmem_var_op()(rhoc_d, mesh); + resmem_var_op()(rab_d, mesh); - resmem_var_op()(this->ctx, aux_d, mesh); - resmem_var_op()(this->ctx, gx_arr_d, rho_basis->ngg); - resmem_var_op()(this->ctx, drhocg_d, rho_basis->ngg); + resmem_var_op()(aux_d, mesh); + resmem_var_op()(gx_arr_d, rho_basis->ngg); + resmem_var_op()(drhocg_d, rho_basis->ngg); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp index 89efb3f879..77c59c8644 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp @@ -27,8 +27,8 @@ FS_Kin_tools::FS_Kin_tools(const UnitCell& ucell_in, if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, d_gk, 3 * npwk_max); - resmem_var_op()(this->ctx, d_kfac, npwk_max); + resmem_var_op()(d_gk, 3 * npwk_max); + resmem_var_op()(d_kfac, npwk_max); } else { diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp index 810b313292..8fdaa402e7 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp @@ -73,36 +73,36 @@ void FS_Nonlocal_tools::allocate_memory(const ModuleBase::matrix // allocate the memory for vkb and vkb_deri. if (this->device == base_device::GpuDevice) { - resmem_int_op()(this->ctx, this->d_dvkb_indexes, max_nh * 4); + resmem_int_op()(this->d_dvkb_indexes, max_nh * 4); } - resmem_var_op()(this->ctx, this->hd_vq, max_nbeta * max_npw); - resmem_var_op()(this->ctx, this->hd_vq_deri, max_nbeta * max_npw); + resmem_var_op()(this->hd_vq, max_nbeta * max_npw); + resmem_var_op()(this->hd_vq_deri, max_nbeta * max_npw); const int _lmax = this->nlpp_->lmaxkb; - resmem_var_op()(this->ctx, this->hd_ylm, (_lmax + 1) * (_lmax + 1) * max_npw); - resmem_var_op()(this->ctx, this->hd_ylm_deri, 3 * (_lmax + 1) * (_lmax + 1) * max_npw); + resmem_var_op()(this->hd_ylm, (_lmax + 1) * (_lmax + 1) * max_npw); + resmem_var_op()(this->hd_ylm_deri, 3 * (_lmax + 1) * (_lmax + 1) * max_npw); const int nks = this->kv_->get_nks(); - resmem_var_op()(this->ctx, d_wk, nks); + resmem_var_op()(d_wk, nks); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wk, this->kv_->wk.data(), nks); if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, d_wg, wg.nr * wg.nc); + resmem_var_op()(d_wg, wg.nr * wg.nc); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, wg.c, wg.nr * wg.nc); if (p_ekb != nullptr) { - resmem_var_op()(this->ctx, d_ekb, p_ekb->nr * p_ekb->nc); + resmem_var_op()(d_ekb, p_ekb->nr * p_ekb->nc); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_ekb, p_ekb->c, p_ekb->nr * p_ekb->nc); } - resmem_int_op()(this->ctx, atom_nh, this->ntype); - resmem_int_op()(this->ctx, atom_na, this->ntype); + resmem_int_op()(atom_nh, this->ntype); + resmem_int_op()(atom_na, this->ntype); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_nh, h_atom_nh.data(), this->ntype); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_na, h_atom_na.data(), this->ntype); - resmem_var_op()(this->ctx, d_g_plus_k, max_npw * 5); - resmem_var_op()(this->ctx, d_pref, max_nh); - resmem_var_op()(this->ctx, d_vq_tab, this->nlpp_->tab.getSize()); - resmem_complex_op()(this->ctx, d_pref_in, max_nh); + resmem_var_op()(d_g_plus_k, max_npw * 5); + resmem_var_op()(d_pref, max_nh); + resmem_var_op()(d_vq_tab, this->nlpp_->tab.getSize()); + resmem_complex_op()(d_pref_in, max_nh); this->ppcell_vkb = this->nlpp_->template get_vkb_data(); } @@ -170,7 +170,7 @@ void FS_Nonlocal_tools::cal_vkb(const int& ik, const int& nbdall const int size_becp = nbdall * npol * this->nkb; if (this->becp == nullptr) { - resmem_complex_op()(this->ctx, becp, size_becp); + resmem_complex_op()(becp, size_becp); } // prepare math tools @@ -183,7 +183,7 @@ void FS_Nonlocal_tools::cal_vkb(const int& ik, const int& nbdall this->g_plus_k = maths.cal_gk(ik, this->wfc_basis_); FPTYPE *gk = g_plus_k.data(), *vq_tb = this->nlpp_->tab.ptr; // calculate sk - resmem_complex_op()(ctx, hd_sk, this->ucell_->nat * npw); + resmem_complex_op()(hd_sk, this->ucell_->nat * npw); this->sf_->get_sk(ctx, ik, this->wfc_basis_, hd_sk); std::complex* d_sk = this->hd_sk; // prepare ylm,size: (lmax+1)^2 * this->max_npw @@ -312,7 +312,7 @@ void FS_Nonlocal_tools::cal_vkb_deri_s(const int& ik, const int size_becp = nbdall * npol * this->nkb; if (this->dbecp == nullptr) { - resmem_complex_op()(this->ctx, dbecp, size_becp); + resmem_complex_op()(dbecp, size_becp); } // prepare math tools @@ -538,7 +538,7 @@ void FS_Nonlocal_tools::cal_vkb_deri_f(const int& ik, const int& const int size_becp = nbdall * npol * this->nkb; if (this->dbecp == nullptr) { - resmem_complex_op()(this->ctx, dbecp, 3 * size_becp); + resmem_complex_op()(dbecp, 3 * size_becp); } const std::complex* vkb_ptr = this->ppcell_vkb; @@ -547,8 +547,8 @@ void FS_Nonlocal_tools::cal_vkb_deri_f(const int& ik, const int& const int npw = this->wfc_basis_->npwk[ik]; if (this->pre_ik_f == -1) { - resmem_var_op()(this->ctx, gcar, 3 * this->wfc_basis_->npwk_max); - resmem_int_op()(this->ctx, gcar_zero_indexes, 3 * this->wfc_basis_->npwk_max); + resmem_var_op()(gcar, 3 * this->wfc_basis_->npwk_max); + resmem_int_op()(gcar_zero_indexes, 3 * this->wfc_basis_->npwk_max); } if (this->pre_ik_f != ik) @@ -730,7 +730,7 @@ void FS_Nonlocal_tools::transfer_gcar(const int& npw, const int& } // prepare the memory for vkb_save const int max_count = std::max(gcar_zero_counts[0], std::max(gcar_zero_counts[1], gcar_zero_counts[2])); - resmem_complex_op()(this->ctx, this->vkb_save, this->nkb * max_count); + resmem_complex_op()(this->vkb_save, this->nkb * max_count); // transfer the gcar and gcar_zero_indexes to the device syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gcar, gcar_tmp.data(), 3 * npw_max); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, gcar_zero_indexes, gcar_zero_indexes_tmp.data(), 3 * npw_max); diff --git a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp index 38ccd9632c..25590fdc20 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp @@ -254,7 +254,7 @@ void HamiltPW::sPsi(const T* psi_in, // psi // psi updated, thus update if (this->ppcell->nkb > 0) { - resmem_complex_op()(this->ctx, becp, nbands * this->ppcell->nkb, "Hamilt::becp"); + resmem_complex_op()(becp, nbands * this->ppcell->nkb, "Hamilt::becp"); char transa = 'C'; char transb = 'N'; if (nbands == 1) @@ -294,7 +294,7 @@ void HamiltPW::sPsi(const T* psi_in, // psi Parallel_Reduce::reduce_pool(becp, this->ppcell->nkb * nbands); } - resmem_complex_op()(this->ctx, ps, this->ppcell->nkb * nbands, "Hamilt::ps"); + resmem_complex_op()(ps, this->ppcell->nkb * nbands, "Hamilt::ps"); setmem_complex_op()(this->ctx, ps, 0, this->ppcell->nkb * nbands); // spsi = psi + sum qq |beta> @@ -316,7 +316,7 @@ void HamiltPW::sPsi(const T* psi_in, // psi { const int nh = atoms->ncpp.nh; T* qqc = nullptr; - resmem_complex_op()(this->ctx, qqc, nh * nh, "Hamilt::qqc"); + resmem_complex_op()(qqc, nh * nh, "Hamilt::qqc"); Real* qq_now = &qq_nt[it * this->ppcell->nhm * this->ppcell->nhm]; for (int i = 0; i < nh; i++) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp index 324d4fb752..7129b78261 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp @@ -81,10 +81,10 @@ TEST_F(TestModuleHamiltEkinetic, ekinetic_pw_op_gpu) { double* gk2_dev = NULL; std::complex* hpsi_dev = NULL, * psi_dev = NULL; - resize_memory_double_op()(gpu_ctx, gk2_dev, gk2.size()); - resize_memory_complex_double_op()(gpu_ctx, psi_dev, psi.size()); + resize_memory_double_op()(gk2_dev, gk2.size()); + resize_memory_complex_double_op()(psi_dev, psi.size()); std::vector > hpsi(expected_hpsi.size(), std::complex(0.0, 0.0)); - resize_memory_complex_double_op()(gpu_ctx, hpsi_dev, hpsi.size()); + resize_memory_complex_double_op()(hpsi_dev, hpsi.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, hpsi_dev, hpsi.data(), hpsi.size()); syncmem_d_h2d_op()(gpu_ctx, cpu_ctx, gk2_dev, gk2.data(), gk2.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, psi_dev, psi.data(), psi.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp index 0507ff3358..3526bc705d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp @@ -2919,9 +2919,9 @@ TEST_F(TestSrcPWForceMultiDevice, cal_vkb1_nl_op_gpu) std::vector> res = vkb1; std::complex*d_res = nullptr, *d_vkb = nullptr; double* d_gcar = nullptr; - resmem_complex_op()(gpu_ctx, d_res, res.size()); - resmem_complex_op()(gpu_ctx, d_vkb, vkb.size()); - resmem_var_op()(gpu_ctx, d_gcar, gcar.size()); + resmem_complex_op()(d_res, res.size()); + resmem_complex_op()(d_vkb, vkb.size()); + resmem_var_op()(d_gcar, gcar.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_vkb, vkb.data(), vkb.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_gcar, gcar.data(), gcar.size()); @@ -2953,11 +2953,11 @@ TEST_F(TestSrcPWForceMultiDevice, cal_force_nl_op_gpu) std::vector res(expected_force.size(), 0); double *d_res = nullptr, *d_wg = nullptr, *d_deeq = nullptr; double *d_ekb = nullptr, *d_qq_nt = nullptr; - resmem_var_op()(gpu_ctx, d_wg, wg.size()); - resmem_var_op()(gpu_ctx, d_res, res.size()); - resmem_var_op()(gpu_ctx, d_deeq, deeq.size()); - resmem_var_op()(gpu_ctx, d_ekb, ekb.size()); - resmem_var_op()(gpu_ctx, d_qq_nt, qq_nt.size()); + resmem_var_op()(d_wg, wg.size()); + resmem_var_op()(d_res, res.size()); + resmem_var_op()(d_deeq, deeq.size()); + resmem_var_op()(d_ekb, ekb.size()); + resmem_var_op()(d_qq_nt, qq_nt.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_wg, wg.data(), wg.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_deeq, deeq.data(), deeq.size()); @@ -2965,14 +2965,14 @@ TEST_F(TestSrcPWForceMultiDevice, cal_force_nl_op_gpu) syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_qq_nt, qq_nt.data(), qq_nt.size()); int *d_atom_nh = nullptr, *d_atom_na = nullptr; - resmem_int_op()(gpu_ctx, d_atom_nh, atom_nh.size()); - resmem_int_op()(gpu_ctx, d_atom_na, atom_na.size()); + resmem_int_op()(d_atom_nh, atom_nh.size()); + resmem_int_op()(d_atom_na, atom_na.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nh, atom_nh.data(), atom_nh.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); std::complex*d_becp = nullptr, *d_dbecp = nullptr; - resmem_complex_op()(gpu_ctx, d_becp, becp.size()); - resmem_complex_op()(gpu_ctx, d_dbecp, dbecp.size()); + resmem_complex_op()(d_becp, becp.size()); + resmem_complex_op()(d_dbecp, dbecp.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_becp, becp.data(), becp.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_dbecp, dbecp.data(), dbecp.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp index d9e9244004..aafae35d27 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp @@ -60,10 +60,10 @@ TEST_F(TestModuleHamiltMeta, meta_pw_op_gpu) std::vector> res(expected_out.size(), std::complex {0, 0}); double * d_gcar = nullptr, * d_kvec_c = nullptr; std::complex* d_in = nullptr, * d_res = nullptr; - resmem_var_op()(gpu_ctx, d_gcar, gcar.size()); - resmem_var_op()(gpu_ctx, d_kvec_c, kvec_c.size()); - resmem_complex_op()(gpu_ctx, d_in, in.size()); - resmem_complex_op()(gpu_ctx, d_res, res.size()); + resmem_var_op()(d_gcar, gcar.size()); + resmem_var_op()(d_kvec_c, kvec_c.size()); + resmem_complex_op()(d_in, in.size()); + resmem_complex_op()(d_res, res.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_gcar, gcar.data(), gcar.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_kvec_c, kvec_c.data(), kvec_c.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in, in.data(), in.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp index 8591182d4b..fd7ce1f98b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp @@ -127,9 +127,9 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_op_gpu) double* deeq_dev = NULL; std::complex* ps_dev = NULL, * becp_dev = NULL; std::vector> ps(expected_ps.size(), std::complex(0.0, 0.0)); - resize_memory_double_op()(gpu_ctx, deeq_dev, deeq.size()); - resize_memory_complex_double_op()(gpu_ctx, ps_dev, ps.size()); - resize_memory_complex_double_op()(gpu_ctx, becp_dev, becp.size()); + resize_memory_double_op()(deeq_dev, deeq.size()); + resize_memory_complex_double_op()(ps_dev, ps.size()); + resize_memory_complex_double_op()(becp_dev, becp.size()); syncmem_d_h2d_op()(gpu_ctx, cpu_ctx, deeq_dev, deeq.data(), deeq.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, ps_dev, ps.data(), ps.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, becp_dev, becp.data(), becp.size()); @@ -157,9 +157,9 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_spin_op_gpu) sum = 0; iat = 0; std::complex* ps_dev = NULL, * becp_dev = NULL, * deeq_dev = NULL; std::vector> ps(expected_ps.size(), std::complex(0.0, 0.0)); - resize_memory_complex_double_op()(gpu_ctx, deeq_dev, deeq_spin.size()); - resize_memory_complex_double_op()(gpu_ctx, ps_dev, ps.size()); - resize_memory_complex_double_op()(gpu_ctx, becp_dev, becp_spin.size()); + resize_memory_complex_double_op()(deeq_dev, deeq_spin.size()); + resize_memory_complex_double_op()(ps_dev, ps.size()); + resize_memory_complex_double_op()(becp_dev, becp_spin.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, deeq_dev, deeq_spin.data(), deeq_spin.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, ps_dev, ps.data(), ps.size()); syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, becp_dev, becp_spin.data(), becp_spin.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp index cbf434da0c..58a42f9238 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp @@ -137,12 +137,12 @@ TEST(TestSrcPWStressMultiDevice, cal_dbecp_noevc_nl_op_gpu) std::complex * d_vkb0i = nullptr, * d_vkb0j = nullptr, * d_vkb = nullptr, * d_vkb1 = nullptr, * d_vkb2 = nullptr, * d_dbecp_noevc = nullptr; double * d_gcar = nullptr, * d_kvec_c = nullptr; - resmem_zd_op()(gpu_ctx, d_vkb0i, vkb0i.size()); - resmem_zd_op()(gpu_ctx, d_vkb0j, vkb0j.size()); - resmem_zd_op()(gpu_ctx, d_vkb, vkb.size()); - resmem_zd_op()(gpu_ctx, d_vkb1, vkb1.size()); - resmem_zd_op()(gpu_ctx, d_vkb2, vkb2.size()); - resmem_zd_op()(gpu_ctx, d_dbecp_noevc, dbecp_noevc.size()); + resmem_zd_op()(d_vkb0i, vkb0i.size()); + resmem_zd_op()(d_vkb0j, vkb0j.size()); + resmem_zd_op()(d_vkb, vkb.size()); + resmem_zd_op()(d_vkb1, vkb1.size()); + resmem_zd_op()(d_vkb2, vkb2.size()); + resmem_zd_op()(d_dbecp_noevc, dbecp_noevc.size()); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb0i, vkb0i.data(), vkb0i.size()); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb0j, vkb0j.data(), vkb0j.size()); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb, vkb.data(), vkb.size()); @@ -150,8 +150,8 @@ TEST(TestSrcPWStressMultiDevice, cal_dbecp_noevc_nl_op_gpu) syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb2, vkb2.data(), vkb2.size()); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_dbecp_noevc, dbecp_noevc.data(), dbecp_noevc.size()); - resmem_dd_op()(gpu_ctx, d_gcar, gcar.size()); - resmem_dd_op()(gpu_ctx, d_kvec_c, kvec_c.size()); + resmem_dd_op()(d_gcar, gcar.size()); + resmem_dd_op()(d_kvec_c, kvec_c.size()); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_gcar, gcar.data(), gcar.size()); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_kvec_c, kvec_c.data(), kvec_c.size()); @@ -236,16 +236,16 @@ TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) double * d_wg = nullptr, * d_deeq = nullptr, * d_stress = nullptr; double * d_ekb = nullptr, * d_qq_nt = nullptr; int * d_atom_nh = nullptr, * d_atom_na = nullptr; - resmem_zd_op()(gpu_ctx, d_becp, becp.size()); - resmem_zd_op()(gpu_ctx, d_dbecp, dbecp.size()); + resmem_zd_op()(d_becp, becp.size()); + resmem_zd_op()(d_dbecp, dbecp.size()); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_becp, becp.data(), becp.size()); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_dbecp, dbecp.data(), dbecp.size()); - resmem_dd_op()(gpu_ctx, d_wg, wg.size()); - resmem_dd_op()(gpu_ctx, d_deeq, deeq.size()); - resmem_dd_op()(gpu_ctx, d_stress, stress.size()); - resmem_dd_op()(gpu_ctx, d_ekb, ekb.size()); - resmem_dd_op()(gpu_ctx, d_qq_nt, qq_nt.size()); + resmem_dd_op()(d_wg, wg.size()); + resmem_dd_op()(d_deeq, deeq.size()); + resmem_dd_op()(d_stress, stress.size()); + resmem_dd_op()(d_ekb, ekb.size()); + resmem_dd_op()(d_qq_nt, qq_nt.size()); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_wg, wg.data(), wg.size()); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_deeq, deeq.data(), deeq.size()); syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_stress, stress.data(), stress.size()); @@ -257,8 +257,8 @@ TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) using syncmem_int_h2d_op = base_device::memory::synchronize_memory_op; - resmem_int_op()(gpu_ctx, d_atom_nh, atom_nh.size()); - resmem_int_op()(gpu_ctx, d_atom_na, atom_na.size()); + resmem_int_op()(d_atom_nh, atom_nh.size()); + resmem_int_op()(d_atom_na, atom_na.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nh, atom_nh.data(), atom_nh.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp index 318646f063..67a453e34b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp @@ -89,8 +89,8 @@ TEST_F(TestModuleHamiltVeff, veff_pw_op_gpu) std::vector> res = out; double* d_in = NULL; std::complex* d_res = NULL; - resize_memory_double_op()(gpu_ctx, d_in, in.size()); - resize_memory_complex_op()(gpu_ctx, d_res, res.size()); + resize_memory_double_op()(d_in, in.size()); + resize_memory_complex_op()(d_res, res.size()); syncmem_double_h2d_op()(gpu_ctx, cpu_ctx, d_in, in.data(), in.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); @@ -112,9 +112,9 @@ TEST_F(TestModuleHamiltVeff, veff_pw_spin_op_gpu) std::vector> res1 = out1_spin; double* d_in = NULL; std::complex* d_res = NULL, * d_res1 = NULL; - resize_memory_double_op()(gpu_ctx, d_in, in_spin.size()); - resize_memory_complex_op()(gpu_ctx, d_res, res.size()); - resize_memory_complex_op()(gpu_ctx, d_res1, res1.size()); + resize_memory_double_op()(d_in, in_spin.size()); + resize_memory_complex_op()(d_res, res.size()); + resize_memory_complex_op()(d_res1, res1.size()); syncmem_double_h2d_op()(gpu_ctx, cpu_ctx, d_in, in_spin.data(), in_spin.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res1, res1.data(), res1.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp index 428304c52d..04d27fa92e 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp @@ -4049,20 +4049,20 @@ TEST_F(TestSrcPWVnlMultiDevice, cal_vnl_op_gpu) *d_tab = nullptr, *d_vkb1 = nullptr; std::complex*d_sk = nullptr, *d_vkb = nullptr; - resmem_int_op()(gpu_ctx, d_atom_na, atom_na.size()); - resmem_int_op()(gpu_ctx, d_atom_nb, atom_nb.size()); - resmem_int_op()(gpu_ctx, d_atom_nh, atom_nh.size()); + resmem_int_op()(d_atom_na, atom_na.size()); + resmem_int_op()(d_atom_nb, atom_nb.size()); + resmem_int_op()(d_atom_nh, atom_nh.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nb, atom_nb.data(), atom_nb.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nh, atom_nh.data(), atom_nh.size()); - resmem_var_op()(gpu_ctx, d_gk, gk.size()); - resmem_var_op()(gpu_ctx, d_ylm, ylm.size()); - resmem_var_op()(gpu_ctx, d_indv, indv.size()); - resmem_var_op()(gpu_ctx, d_nhtol, nhtol.size()); - resmem_var_op()(gpu_ctx, d_nhtolm, nhtolm.size()); - resmem_var_op()(gpu_ctx, d_tab, tab.size()); - resmem_var_op()(gpu_ctx, d_vkb1, vkb1.size()); + resmem_var_op()(d_gk, gk.size()); + resmem_var_op()(d_ylm, ylm.size()); + resmem_var_op()(d_indv, indv.size()); + resmem_var_op()(d_nhtol, nhtol.size()); + resmem_var_op()(d_nhtolm, nhtolm.size()); + resmem_var_op()(d_tab, tab.size()); + resmem_var_op()(d_vkb1, vkb1.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_gk, gk.data(), gk.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_ylm, ylm.data(), ylm.size()); @@ -4072,8 +4072,8 @@ TEST_F(TestSrcPWVnlMultiDevice, cal_vnl_op_gpu) syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_tab, tab.data(), tab.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_vkb1, vkb1.data(), vkb1.size()); - resmem_complex_op()(gpu_ctx, d_sk, sk.size()); - resmem_complex_op()(gpu_ctx, d_vkb, vkb.size()); + resmem_complex_op()(d_sk, sk.size()); + resmem_complex_op()(d_vkb, vkb.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_sk, sk.data(), sk.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_vkb, vkb.data(), vkb.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp index 8b46679d67..419dfbd536 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp @@ -412,22 +412,22 @@ TEST_F(TestSrcPWWfMultiDevice, cal_sk_op_gpu) double * d_kvec_c = nullptr, * d_atom_tau = nullptr; std::complex * d_sk = nullptr, * d_eigts1 = nullptr, * d_eigts2 = nullptr, * d_eigts3 = nullptr; - resmem_int_op()(gpu_ctx, d_atom_na, atom_na.size()); - resmem_int_op()(gpu_ctx, d_igl2isz, igl2isz.size()); - resmem_int_op()(gpu_ctx, d_is2fftixy, is2fftixy.size()); + resmem_int_op()(d_atom_na, atom_na.size()); + resmem_int_op()(d_igl2isz, igl2isz.size()); + resmem_int_op()(d_is2fftixy, is2fftixy.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_igl2isz, igl2isz.data(), igl2isz.size()); syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_is2fftixy, is2fftixy.data(), is2fftixy.size()); - resmem_var_op()(gpu_ctx, d_kvec_c, kvec_c.size()); - resmem_var_op()(gpu_ctx, d_atom_tau, atom_tau.size()); + resmem_var_op()(d_kvec_c, kvec_c.size()); + resmem_var_op()(d_atom_tau, atom_tau.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_kvec_c, kvec_c.data(), kvec_c.size()); syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_atom_tau, atom_tau.data(), atom_tau.size()); - resmem_complex_op()(gpu_ctx, d_sk, sk.size()); - resmem_complex_op()(gpu_ctx, d_eigts1, eigts1.size()); - resmem_complex_op()(gpu_ctx, d_eigts2, eigts2.size()); - resmem_complex_op()(gpu_ctx, d_eigts3, eigts3.size()); + resmem_complex_op()(d_sk, sk.size()); + resmem_complex_op()(d_eigts1, eigts1.size()); + resmem_complex_op()(d_eigts2, eigts2.size()); + resmem_complex_op()(d_eigts3, eigts3.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_sk, sk.data(), sk.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_eigts1, eigts1.data(), eigts1.size()); syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_eigts2, eigts2.data(), eigts2.size()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp index d4b7e51b65..fca63be74b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp @@ -192,29 +192,29 @@ void Onsite_Proj_tools::allocate_memory(const ModuleBase::matrix // allocate the memory for vkb and vkb_deri. if (this->device == base_device::GpuDevice) { - resmem_int_op()(this->ctx, this->d_dvkb_indexes, max_nh * 4); + resmem_int_op()(this->d_dvkb_indexes, max_nh * 4); } - resmem_var_op()(this->ctx, this->hd_vq, nprojmax * max_npw); - resmem_var_op()(this->ctx, this->hd_vq_deri, nprojmax * max_npw); - resmem_var_op()(this->ctx, this->hd_ylm, (lprojmax + 1) * (lprojmax + 1) * max_npw); - resmem_var_op()(this->ctx, this->hd_ylm_deri, 3 * (lprojmax + 1) * (lprojmax + 1) * max_npw); + resmem_var_op()(this->hd_vq, nprojmax * max_npw); + resmem_var_op()(this->hd_vq_deri, nprojmax * max_npw); + resmem_var_op()(this->hd_ylm, (lprojmax + 1) * (lprojmax + 1) * max_npw); + resmem_var_op()(this->hd_ylm_deri, 3 * (lprojmax + 1) * (lprojmax + 1) * max_npw); if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, d_wg, wg.nr * wg.nc); - resmem_var_op()(this->ctx, d_ekb, ekb.nr * ekb.nc); + resmem_var_op()(d_wg, wg.nr * wg.nc); + resmem_var_op()(d_ekb, ekb.nr * ekb.nc); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, wg.c, wg.nr * wg.nc); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_ekb, ekb.c, ekb.nr * ekb.nc); - resmem_int_op()(this->ctx, atom_nh, this->ntype); - resmem_int_op()(this->ctx, atom_na, this->ntype); + resmem_int_op()(atom_nh, this->ntype); + resmem_int_op()(atom_na, this->ntype); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_nh, h_atom_nh.data(), this->ntype); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_na, h_atom_na.data(), this->ntype); - resmem_var_op()(this->ctx, d_g_plus_k, max_npw * 5); - resmem_var_op()(this->ctx, d_pref, max_nh); - resmem_var_op()(this->ctx, d_vq_tab, this->tabtpr->getSize()); - resmem_complex_op()(this->ctx, d_pref_in, max_nh); + resmem_var_op()(d_g_plus_k, max_npw * 5); + resmem_var_op()(d_pref, max_nh); + resmem_var_op()(d_vq_tab, this->tabtpr->getSize()); + resmem_complex_op()(d_pref_in, max_nh); } else { @@ -288,7 +288,7 @@ void Onsite_Proj_tools::cal_becp(int ik, const int npw = this->wfc_basis_->npwk[ik]; if (becp_in == nullptr && this->becp == nullptr) { - resmem_complex_op()(this->ctx, becp, this->nbands * npol * this->nkb); + resmem_complex_op()(becp, this->nbands * npol * this->nkb); } std::complex* becp_tmp = becp_in == nullptr ? this->becp : becp_in; const int size_becp_act = npm * npol * this->nkb; @@ -297,7 +297,7 @@ void Onsite_Proj_tools::cal_becp(int ik, const int size_becp = this->nbands * npol * this->nkb; if (this->becp == nullptr) { - resmem_complex_op()(this->ctx, becp, size_becp); + resmem_complex_op()(becp, size_becp); } // prepare math tools @@ -311,7 +311,7 @@ void Onsite_Proj_tools::cal_becp(int ik, // vq_tb has dimension (ntype, nproj, GlobalV::NQX) // calculate sk - resmem_complex_op()(ctx, hd_sk, this->ucell_->nat * npw); + resmem_complex_op()(hd_sk, this->ucell_->nat * npw); this->sf_->get_sk(ctx, ik, this->wfc_basis_, hd_sk); std::complex* d_sk = this->hd_sk; // prepare ylm,size: (lmax+1)^2 * this->max_npw @@ -443,7 +443,7 @@ void Onsite_Proj_tools::cal_becp(int ik, if (this->device == base_device::GpuDevice) { std::complex* h_becp = nullptr; - resmem_complex_h_op()(this->cpu_ctx, h_becp, size_becp_act); + resmem_complex_h_op()(h_becp, size_becp_act); syncmem_complex_d2h_op()(this->cpu_ctx, this->ctx, h_becp, becp_tmp, size_becp_act); Parallel_Reduce::reduce_pool(h_becp, size_becp_act); syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, becp_tmp, h_becp, size_becp_act); @@ -474,7 +474,7 @@ void Onsite_Proj_tools::cal_dbecp_s(int ik, int npm, int ipol, i const int npm_npol = npm * npol; if (this->dbecp == nullptr) { - resmem_complex_op()(this->ctx, dbecp, size_becp); + resmem_complex_op()(dbecp, size_becp); } // prepare math tools @@ -613,8 +613,8 @@ void Onsite_Proj_tools::cal_dbecp_f(int ik, int npm, int ipol) // calculate gcarx, gcary/gcarx and gcarz/gcary, overwrite gcar if (this->pre_ik_f == -1) // if it is the very first run, we allocate { - resmem_var_op()(this->ctx, gcar, 3 * this->wfc_basis_->npwk_max); - resmem_int_op()(this->ctx, gcar_zero_indexes, 3 * this->wfc_basis_->npwk_max); + resmem_var_op()(gcar, 3 * this->wfc_basis_->npwk_max); + resmem_int_op()(gcar_zero_indexes, 3 * this->wfc_basis_->npwk_max); } // first refresh the value of gcar_zero_indexes, gcar_zero_counts if (this->pre_ik_f != ik) @@ -647,7 +647,7 @@ void Onsite_Proj_tools::cal_dbecp_f(int ik, int npm, int ipol) const int size_becp = this->nbands * npol * this->nkb; if (this->dbecp == nullptr) // if it is the very first run, we allocate { // why not judging whether dbecp == nullptr inside resmem_complex_op? - resmem_complex_op()(this->ctx, dbecp, 3 * size_becp); + resmem_complex_op()(dbecp, 3 * size_becp); } // do gemm to get dbecp and revert the ppcell_vkb for next ipol const std::complex* ppsi = &(this->psi_[0](ik, 0, 0)); @@ -799,7 +799,7 @@ void Onsite_Proj_tools::transfer_gcar(int npw, int npw_max, cons } // prepare the memory for vkb_save const int max_count = std::max(gcar_zero_counts[0], std::max(gcar_zero_counts[1], gcar_zero_counts[2])); - resmem_complex_op()(this->ctx, this->vkb_save, this->nkb * max_count); + resmem_complex_op()(this->vkb_save, this->nkb * max_count); // transfer the gcar and gcar_zero_indexes to the device syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gcar, gcar_tmp.data(), 3 * npw_max); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, gcar_zero_indexes, gcar_zero_indexes_tmp.data(), 3 * npw_max); @@ -819,9 +819,9 @@ void Onsite_Proj_tools::cal_force_dftu(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - resmem_int_op()(this->ctx, orbital_corr_tmp, this->ucell_->ntype); + resmem_int_op()(orbital_corr_tmp, this->ucell_->ntype); syncmem_int_h2d_op()(this->ctx, cpu_ctx, orbital_corr_tmp, orbital_corr, this->ucell_->ntype); - resmem_complex_op()(this->ctx, vu_tmp, size_vu); + resmem_complex_op()(vu_tmp, size_vu); syncmem_complex_h2d_op()(this->ctx, cpu_ctx, vu_tmp, vu, size_vu); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); } @@ -877,7 +877,7 @@ void Onsite_Proj_tools::cal_force_dspin(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, lambda_tmp, this->ucell_->nat * 3); + resmem_var_op()(lambda_tmp, this->ucell_->nat * 3); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, lambda_tmp, lambda_array.data(), this->ucell_->nat * 3); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); } @@ -927,9 +927,9 @@ void Onsite_Proj_tools::cal_stress_dftu(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - resmem_int_op()(this->ctx, orbital_corr_tmp, this->ucell_->ntype); + resmem_int_op()(orbital_corr_tmp, this->ucell_->ntype); syncmem_int_h2d_op()(this->ctx, cpu_ctx, orbital_corr_tmp, orbital_corr, this->ucell_->ntype); - resmem_complex_op()(this->ctx, vu_tmp, size_vu); + resmem_complex_op()(vu_tmp, size_vu); syncmem_complex_h2d_op()(this->ctx, cpu_ctx, vu_tmp, vu, size_vu); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); } @@ -981,7 +981,7 @@ void Onsite_Proj_tools::cal_stress_dspin(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, lambda_tmp, this->ucell_->nat * 3); + resmem_var_op()(lambda_tmp, this->ucell_->nat * 3); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, lambda_tmp, lambda_array.data(), this->ucell_->nat * 3); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp index 2bb69dc131..9e329f0869 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp @@ -173,7 +173,7 @@ void projectors::OnsiteProjector::init(const std::string& orbital_dir this->tot_nproj = itiaiprojm2irow_.size(); this->npwx_ = this->pw_basis_->npwk_max; this->size_vproj = this->tot_nproj * this->npwx_; - resmem_complex_op()(this->ctx, this->tab_atomic_, this->size_vproj, "OnsiteP::tab_atomic_"); + resmem_complex_op()(this->tab_atomic_, this->size_vproj, "OnsiteP::tab_atomic_"); } delete this->fs_tools; // it is okay to delete nullptr @@ -390,10 +390,10 @@ void projectors::OnsiteProjector::overlap_proj_psi( if(this->becp == nullptr || this->size_becp < npm*this->tot_nproj) { this->size_becp = npm*this->tot_nproj; - resmem_complex_op()(this->ctx, this->becp, this->size_becp); + resmem_complex_op()(this->becp, this->size_becp); if(this->device == base_device::GpuDevice ) { - resmem_complex_h_op()(this->cpu_ctx, this->h_becp, this->size_becp); + resmem_complex_h_op()(this->h_becp, this->size_becp); } else { diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp index b0372109dc..1f17e35313 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp @@ -27,7 +27,7 @@ Meta>::Meta(Real tpiba_in, this->vk_row = vk_row; this->vk_col = vk_col; this->wfcpw = wfcpw_in; - resmem_complex_op()(this->ctx, this->porter, this->wfcpw->nmaxgr, "Meta::porter"); + resmem_complex_op()(this->porter, this->wfcpw->nmaxgr, "Meta::porter"); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp index 563e9d23a0..eb40c2251e 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp @@ -72,7 +72,7 @@ void Nonlocal>::add_nonlocal_pp(T *hpsi_in, const T *becp, // T *ps = new T[nkb * m]; // ModuleBase::GlobalFunc::ZEROS(ps, m * nkb); if (this->nkb_m < m * nkb) { - resmem_complex_op()(this->ctx, this->ps, nkb * m, "Nonlocal::ps"); + resmem_complex_op()(this->ps, nkb * m, "Nonlocal::ps"); this->nkb_m = m * nkb; } setmem_complex_op()(this->ctx, this->ps, 0, nkb * m); @@ -235,7 +235,7 @@ void Nonlocal>::act( // qianrui optimize 2021-3-31 int nkb = this->ppcell->nkb; if (this->nkb_m < nbands * nkb) { - resmem_complex_op()(this->ctx, this->becp, nbands * nkb, "Nonlocal::becp"); + resmem_complex_op()(this->becp, nbands * nkb, "Nonlocal::becp"); } // ModuleBase::ComplexMatrix becp(nbands, nkb, false); char transa = 'C'; diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp index 39f0c1458a..6d328e017f 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp @@ -127,7 +127,7 @@ void OnsiteProj>::cal_ps_delta_spin(const int npol, const // T *ps = new T[tnp * m]; // ModuleBase::GlobalFunc::ZEROS(ps, m * tnp); if (this->nkb_m < m * tnp) { - resmem_complex_op()(this->ctx, this->ps, tnp * m, "OnsiteProj::ps"); + resmem_complex_op()(this->ps, tnp * m, "OnsiteProj::ps"); this->nkb_m = m * tnp; } setmem_complex_op()(this->ctx, this->ps, 0, tnp * m); @@ -136,8 +136,8 @@ void OnsiteProj>::cal_ps_delta_spin(const int npol, const { this->init_delta_spin = true; //prepare ip_iat and lambda_coeff - resmem_int_op()(this->ctx, this->ip_iat, onsite_p->get_tot_nproj()); - resmem_complex_op()(this->ctx, this->lambda_coeff, this->ucell->nat * 4); + resmem_int_op()(this->ip_iat, onsite_p->get_tot_nproj()); + resmem_complex_op()(this->lambda_coeff, this->ucell->nat * 4); std::vector ip_iat0(onsite_p->get_tot_nproj()); int ip0 = 0; for(int iat=0;iatucell->nat;iat++) @@ -225,7 +225,7 @@ void OnsiteProj>::cal_ps_dftu(const int npol, const int m) // T *ps = new T[tnp * m]; // ModuleBase::GlobalFunc::ZEROS(ps, m * tnp); if (this->nkb_m < m * tnp) { - resmem_complex_op()(this->ctx, this->ps, tnp * m, "OnsiteProj::ps"); + resmem_complex_op()(this->ps, tnp * m, "OnsiteProj::ps"); this->nkb_m = m * tnp; } if(!this->has_delta_spin) @@ -237,11 +237,11 @@ void OnsiteProj>::cal_ps_dftu(const int npol, const int m) { this->init_dftu = true; //prepare orb_l_iat, ip_m, vu_begin_iat and vu_device - resmem_int_op()(this->ctx, this->orb_l_iat, this->ucell->nat); - resmem_int_op()(this->ctx, this->ip_m, onsite_p->get_tot_nproj()); - resmem_int_op()(this->ctx, this->vu_begin_iat, this->ucell->nat); + resmem_int_op()(this->orb_l_iat, this->ucell->nat); + resmem_int_op()(this->ip_m, onsite_p->get_tot_nproj()); + resmem_int_op()(this->vu_begin_iat, this->ucell->nat); // recal the ip_iat - resmem_int_op()(this->ctx, this->ip_iat, onsite_p->get_tot_nproj()); + resmem_int_op()(this->ip_iat, onsite_p->get_tot_nproj()); std::vector ip_iat0(onsite_p->get_tot_nproj()); std::vector ip_m0(onsite_p->get_tot_nproj()); std::vector vu_begin_iat0(this->ucell->nat); @@ -290,7 +290,7 @@ void OnsiteProj>::cal_ps_dftu(const int npol, const int m) syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->ip_m, ip_m0.data(), onsite_p->get_tot_nproj()); syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->vu_begin_iat, vu_begin_iat0.data(), this->ucell->nat); - resmem_complex_op()(this->ctx, this->vu_device, dftu->get_size_eff_pot_pw()); + resmem_complex_op()(this->vu_device, dftu->get_size_eff_pot_pw()); } syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, this->vu_device, dftu->get_eff_pot_pw(0), dftu->get_size_eff_pot_pw()); diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp index 2343ee7ecb..53d40b1980 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp @@ -23,8 +23,8 @@ Veff>::Veff(const int* isk_in, this->veff_row = veff_row; this->veff_col = veff_col; this->wfcpw = wfcpw_in; - resmem_complex_op()(this->ctx, this->porter, this->wfcpw->nmaxgr, "Veff::porter"); - resmem_complex_op()(this->ctx, this->porter1, this->wfcpw->nmaxgr, "Veff::porter1"); + resmem_complex_op()(this->porter, this->wfcpw->nmaxgr, "Veff::porter"); + resmem_complex_op()(this->porter1, this->wfcpw->nmaxgr, "Veff::porter1"); } @@ -124,8 +124,8 @@ hamilt::Veff>::Veff(const Veff this->veff_col = veff->get_veff_col(); this->veff_row = veff->get_veff_row(); this->wfcpw = veff->get_wfcpw(); - resmem_complex_op()(this->ctx, this->porter, this->wfcpw->nmaxgr); - resmem_complex_op()(this->ctx, this->porter1, this->wfcpw->nmaxgr); + resmem_complex_op()(this->porter, this->wfcpw->nmaxgr); + resmem_complex_op()(this->porter1, this->wfcpw->nmaxgr); this->veff = veff->get_veff(); if (this->isk == nullptr || this->veff == nullptr || this->wfcpw == nullptr) { ModuleBase::WARNING_QUIT("VeffPW", "Constuctor of Operator::VeffPW is failed, please check your code!"); diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp index ab8d9b3fa1..8bfac873d2 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp @@ -289,13 +289,13 @@ void Stress_Func::deriv_drhoc double *aux_d = nullptr; double *drhocg_d = nullptr; if(this->device == base_device::GpuDevice ) { - resmem_var_op()(this->ctx, r_d, mesh); - resmem_var_op()(this->ctx, rhoc_d, mesh); - resmem_var_op()(this->ctx, rab_d, mesh); + resmem_var_op()(r_d, mesh); + resmem_var_op()(rhoc_d, mesh); + resmem_var_op()(rab_d, mesh); - resmem_var_op()(this->ctx, aux_d, mesh); - resmem_var_op()(this->ctx, gx_arr_d, rho_basis->ngg); - resmem_var_op()(this->ctx, drhocg_d, rho_basis->ngg); + resmem_var_op()(aux_d, mesh); + resmem_var_op()(gx_arr_d, rho_basis->ngg); + resmem_var_op()(drhocg_d, rho_basis->ngg); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gx_arr_d, gx_arr.data(), rho_basis->ngg); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, mesh); diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp index 740d692c39..3b842496f1 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp @@ -244,13 +244,13 @@ const UnitCell& ucell_in double *aux_d = nullptr; double *drhocg_d = nullptr; if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, r_d, msh); - resmem_var_op()(this->ctx, rhoc_d, msh); - resmem_var_op()(this->ctx, rab_d, msh); + resmem_var_op()(r_d, msh); + resmem_var_op()(rhoc_d, msh); + resmem_var_op()(rab_d, msh); - resmem_var_op()(this->ctx, aux_d, msh); - resmem_var_op()(this->ctx, gx_arr_d, rho_basis->ngg+1); - resmem_var_op()(this->ctx, drhocg_d, rho_basis->ngg); + resmem_var_op()(aux_d, msh); + resmem_var_op()(gx_arr_d, rho_basis->ngg+1); + resmem_var_op()(drhocg_d, rho_basis->ngg); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp index 73b9e08a82..fcd5356ee5 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp @@ -30,7 +30,7 @@ void Stress_Func::stress_nl(ModuleBase::matrix& sigma, ModuleBase::timer::tick("Stress_Func", "stress_nl"); FPTYPE* stress_device = nullptr; - resmem_var_op()(this->ctx, stress_device, 9); + resmem_var_op()(stress_device, 9); setmem_var_op()(this->ctx, stress_device, 0, 9); std::vector sigmanlc(9, 0.0); diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp index 8568821a10..4c61d0841d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp @@ -22,7 +22,7 @@ void Stress_Func::stress_onsite(ModuleBase::matrix& sigma, ModuleBase::timer::tick("Stress_Func", "stress_onsite"); FPTYPE* stress_device = nullptr; - resmem_var_op()(this->ctx, stress_device, 9); + resmem_var_op()(stress_device, 9); setmem_var_op()(this->ctx, stress_device, 0, 9); std::vector sigma_onsite(9, 0.0); diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp index 24dcbe27ce..a278aea4dd 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp @@ -151,25 +151,25 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral } if (device == "gpu") { if (PARAM.inp.precision == "single") { - resmem_cd_op()(gpu_ctx, this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); - resmem_cd_op()(gpu_ctx, this->c_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); - resmem_cd_op()(gpu_ctx, this->c_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); + resmem_cd_op()(this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); + resmem_cd_op()(this->c_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); + resmem_cd_op()(this->c_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); } - resmem_zd_op()(gpu_ctx, this->z_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); - resmem_zd_op()(gpu_ctx, this->z_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); - resmem_zd_op()(gpu_ctx, this->z_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); + resmem_zd_op()(this->z_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); + resmem_zd_op()(this->z_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); + resmem_zd_op()(this->z_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); } else { if (PARAM.inp.precision == "single") { - resmem_ch_op()(cpu_ctx, this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); - resmem_ch_op()(cpu_ctx, this->c_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); - resmem_ch_op()(cpu_ctx, this->c_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); + resmem_ch_op()(this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); + resmem_ch_op()(this->c_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); + resmem_ch_op()(this->c_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp index add76f6fb3..2fc457153a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp @@ -91,10 +91,10 @@ void Structure_Factor::get_sk(Device* ctx, } if (device == base_device::GpuDevice) { - resmem_int_op()(ctx, atom_na, ucell->ntype); + resmem_int_op()(atom_na, ucell->ntype); syncmem_int_op()(ctx, cpu_ctx, atom_na, h_atom_na, ucell->ntype); - resmem_var_op()(ctx, atom_tau, ucell->nat * 3); + resmem_var_op()(atom_tau, ucell->nat * 3); syncmem_var_op()(ctx, cpu_ctx, atom_tau, h_atom_tau, ucell->nat * 3); igl2isz = wfc_basis->d_igl2isz_k; diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp index 23a5a18926..8c50427459 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp @@ -20,12 +20,12 @@ StoChe::StoChe(const int& nche, const int& method, const REAL& ema p_che = new ModuleBase::Chebyshev(nche); if (method == 1) { - resmem_var_op()(this->ctx, spolyv, nche); + resmem_var_op()(spolyv, nche); spolyv_cpu = new REAL[nche]; } else { - resmem_var_op()(this->ctx, spolyv, nche * nche); + resmem_var_op()(spolyv, nche * nche); } this->emax_sto = emax_sto; diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_che.h b/source/module_hamilt_pw/hamilt_stodft/sto_che.h index 3a7d2f0090..6fd099f0ea 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_che.h +++ b/source/module_hamilt_pw/hamilt_stodft/sto_che.h @@ -50,11 +50,11 @@ REAL vTMv(const REAL* v, const REAL* M, const int n) const int inc = 1; const REAL zero = 0; REAL* y = nullptr; - base_device::memory::resize_memory_op()(ctx, y, n); + base_device::memory::resize_memory_op()(y, n); hsolver::gemv_op()(ctx, normal, n, n, &one, M, n, v, inc, &zero, y, inc); REAL result = 0; REAL* dot_device = nullptr; - base_device::memory::resize_memory_op()(ctx, dot_device, 1); + base_device::memory::resize_memory_op()(dot_device, 1); container::kernels::blas_dot()(n, y, 1, v, 1, dot_device); base_device::memory::synchronize_memory_op()(cpu_ctx, ctx, diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp index db54e40db0..dd5bbf9198 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp @@ -217,7 +217,7 @@ void Sto_Forces::cal_sto_force_nl( // allocate memory for the force FPTYPE* force = nullptr; - resmem_var_op()(this->ctx, force, ucell.nat * 3); + resmem_var_op()(force, ucell.nat * 3); base_device::memory::set_memory_op()(this->ctx, force, 0.0, ucell.nat * 3); hamilt::FS_Nonlocal_tools nl_tools(&nlpp, &ucell, p_kv, wfc_basis, p_sf, wg, nullptr); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp index ec4aa26c1c..7258227602 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp @@ -27,7 +27,7 @@ template void Stochastic_Iter::dot(const int& n, const Real* x, const int& incx, const Real* y, const int& incy, Real& result) { Real* result_device = nullptr; - resmem_var_op()(this->ctx, result_device, 1); + resmem_var_op()(result_device, 1); container::kernels::blas_dot()(n, p_che->coef_real, 1, spolyv, 1, result_device); syncmem_var_d2h_op()(cpu_ctx, this->ctx, &result, result_device, 1); delmem_var_op()(this->ctx, result_device); @@ -73,7 +73,7 @@ void Stochastic_Iter::orthog(const int& ik, psi::Psi& psi, // orthogonal part T* sum = nullptr; - resmem_complex_op()(this->ctx, sum, PARAM.inp.nbands * nchipk); + resmem_complex_op()(sum, PARAM.inp.nbands * nchipk); char transC = 'C'; char transN = 'N'; @@ -539,7 +539,7 @@ void Stochastic_Iter::sum_stoeband(Stochastic_WF& stowf, const int npw = this->pkv->ngk[ik]; const double kweight = this->pkv->wk[ik]; T* hshchi = nullptr; - resmem_complex_op()(this->ctx, hshchi, nchip_ik * npwx); + resmem_complex_op()(hshchi, nchip_ik * npwx); T* tmpin = stowf.shchi->get_pointer(); T* tmpout = hshchi; p_hamilt_sto->hPsi(tmpin, tmpout, nchip_ik); @@ -573,7 +573,7 @@ void Stochastic_Iter::cal_storho(const UnitCell& ucell, const int nspin = PARAM.inp.nspin; T* porter = nullptr; - resmem_complex_op()(this->ctx, porter, nrxx); + resmem_complex_op()(porter, nrxx); std::vector sto_rho(nspin); for(int is = 0; is < nspin; ++is) @@ -735,7 +735,7 @@ void Stochastic_Iter::calTnchi_ik(const int& ik, Stochastic_WFnorder; T* coef_real = nullptr; - resmem_complex_op()(this->ctx, coef_real, N); + resmem_complex_op()(coef_real, N); castmem_d2z_op()(this->ctx, this->ctx, coef_real, p_che->coef_real, p_che->norder); gemv_op()(this->ctx, transa, M, N, &one, stowf.chiallorder[ik].get_pointer(), LDA, coef_real, inc, &zero, out, inc); // zgemv_(&transa, &M, &N, &one, stowf.chiallorder[ik].get_pointer(), &LDA, coef_real, &inc, &zero, out, &inc); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp index 5be294f2e7..adc3116109 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp @@ -167,7 +167,7 @@ void Sto_Stress_PW::sto_stress_nl(ModuleBase::matrix& sigma, // allocate memory for the stress FPTYPE* stress_device = nullptr; - resmem_var_op()(this->ctx, stress_device, 9); + resmem_var_op()(stress_device, 9); setmem_var_op()(this->ctx, stress_device, 0, 9); std::vector sigmanlc(9, 0.0); diff --git a/source/module_hsolver/diag_const_nums.cpp b/source/module_hsolver/diag_const_nums.cpp index 8b459cbf7c..4d9cb8fd83 100644 --- a/source/module_hsolver/diag_const_nums.cpp +++ b/source/module_hsolver/diag_const_nums.cpp @@ -11,14 +11,11 @@ template class const_nums>; template <> const_nums::const_nums() { - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->zero, 1); + base_device::memory::resize_memory_op()(this->zero, 1); this->zero[0] = 0.0; - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->one, 1); + base_device::memory::resize_memory_op()(this->one, 1); this->one[0] = 1.0; - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->neg_one, 1); + base_device::memory::resize_memory_op()(this->neg_one, 1); this->neg_one[0] = -1.0; } @@ -26,14 +23,11 @@ const_nums::const_nums() template <> const_nums::const_nums() { - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->zero, 1); + base_device::memory::resize_memory_op()(this->zero, 1); this->zero[0] = 0.0; - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->one, 1); + base_device::memory::resize_memory_op()(this->one, 1); this->one[0] = 1.0; - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->neg_one, 1); + base_device::memory::resize_memory_op()(this->neg_one, 1); this->neg_one[0] = -1.0; } @@ -41,14 +35,11 @@ const_nums::const_nums() template <> const_nums>::const_nums() { - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()( - this->cpu_ctx, this->zero, 1); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(this->zero, 1); this->zero[0] = std::complex(0.0, 0.0); - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()( - this->cpu_ctx, this->one, 1); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(this->one, 1); this->one[0] = std::complex(1.0, 0.0); - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()( - this->cpu_ctx, this->neg_one, 1); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(this->neg_one, 1); this->neg_one[0] = std::complex(-1.0, 0.0); } @@ -56,13 +47,10 @@ const_nums>::const_nums() template <> const_nums>::const_nums() { - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()( - this->cpu_ctx, this->zero, 1); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(this->zero, 1); this->zero[0] = std::complex(0.0, 0.0); - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()( - this->cpu_ctx, this->one, 1); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(this->one, 1); this->one[0] = std::complex(1.0, 0.0); - base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()( - this->cpu_ctx, this->neg_one, 1); + base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(this->neg_one, 1); this->neg_one[0] = std::complex(-1.0, 0.0); } \ No newline at end of file diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 82dadcb0d0..8486771167 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -46,30 +46,30 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond // TODO: Added memory usage statistics //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - resmem_complex_op()(this->ctx, this->psi_in_iter, this->nbase_x * this->dim, "DAV::psi_in_iter"); + resmem_complex_op()(this->psi_in_iter, this->nbase_x * this->dim, "DAV::psi_in_iter"); setmem_complex_op()(this->ctx, this->psi_in_iter, 0, this->nbase_x * this->dim); // the product of H and psi in the reduced psi set - resmem_complex_op()(this->ctx, this->hphi, this->nbase_x * this->dim, "DAV::hphi"); + resmem_complex_op()(this->hphi, this->nbase_x * this->dim, "DAV::hphi"); setmem_complex_op()(this->ctx, this->hphi, 0, this->nbase_x * this->dim); // Hamiltonian on the reduced psi set - resmem_complex_op()(this->ctx, this->hcc, this->nbase_x * this->nbase_x, "DAV::hcc"); + resmem_complex_op()(this->hcc, this->nbase_x * this->nbase_x, "DAV::hcc"); setmem_complex_op()(this->ctx, this->hcc, 0, this->nbase_x * this->nbase_x); // Overlap on the reduced psi set - resmem_complex_op()(this->ctx, this->scc, this->nbase_x * this->nbase_x, "DAV::scc"); + resmem_complex_op()(this->scc, this->nbase_x * this->nbase_x, "DAV::scc"); setmem_complex_op()(this->ctx, this->scc, 0, this->nbase_x * this->nbase_x); // Eigenvectors - resmem_complex_op()(this->ctx, this->vcc, this->nbase_x * this->nbase_x, "DAV::vcc"); + resmem_complex_op()(this->vcc, this->nbase_x * this->nbase_x, "DAV::vcc"); setmem_complex_op()(this->ctx, this->vcc, 0, this->nbase_x * this->nbase_x); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - resmem_real_op()(this->ctx, this->d_precondition, nbasis_in); + resmem_real_op()(this->d_precondition, nbasis_in); // syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, this->precondition.data(), nbasis_in); } #endif @@ -303,7 +303,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, if(this->device == base_device::GpuDevice) { e_temp_hd = nullptr; - resmem_real_op()(this->ctx, e_temp_hd, nbase); + resmem_real_op()(e_temp_hd, nbase); } for (int m = 0; m < notconv; m++) { @@ -544,16 +544,16 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, if (this->diag_comm.rank == 0) { Real* eigenvalue_gpu = nullptr; - resmem_real_op()(this->ctx, eigenvalue_gpu, this->nbase_x); + resmem_real_op()(eigenvalue_gpu, this->nbase_x); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalue_gpu, (*eigenvalue_iter).data(), this->nbase_x); T* hcc_gpu = nullptr; T* scc_gpu = nullptr; T* vcc_gpu = nullptr; - base_device::memory::resize_memory_op()(this->ctx, hcc_gpu, nbase * nbase); - base_device::memory::resize_memory_op()(this->ctx, scc_gpu, nbase * nbase); - base_device::memory::resize_memory_op()(this->ctx, vcc_gpu, nbase * nbase); + base_device::memory::resize_memory_op()(hcc_gpu, nbase * nbase); + base_device::memory::resize_memory_op()(scc_gpu, nbase * nbase); + base_device::memory::resize_memory_op()(vcc_gpu, nbase * nbase); for(int i=0;i()(this->ctx, this->ctx, hcc_gpu + i * nbase, hcc + i * nbase_x, nbase); @@ -733,16 +733,13 @@ void Diago_DavSubspace::refresh(const int& dim, T* hcc_cpu = nullptr; T* scc_cpu = nullptr; T* vcc_cpu = nullptr; - base_device::memory::resize_memory_op()(this->cpu_ctx, - hcc_cpu, + base_device::memory::resize_memory_op()(hcc_cpu, this->nbase_x * this->nbase_x, "DAV::hcc"); - base_device::memory::resize_memory_op()(this->cpu_ctx, - scc_cpu, + base_device::memory::resize_memory_op()(scc_cpu, this->nbase_x * this->nbase_x, "DAV::scc"); - base_device::memory::resize_memory_op()(this->cpu_ctx, - vcc_cpu, + base_device::memory::resize_memory_op()(vcc_cpu, this->nbase_x * this->nbase_x, "DAV::vcc"); diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index b4805a82fa..fcf35472f4 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -59,26 +59,25 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, /// - "basis" : number of occupied ks-orbitals(subscripts i,j) * number of unoccupied ks-orbitals(subscripts a,b), corresponding to "bands" of the ground state // the lowest N eigenvalues - base_device::memory::resize_memory_op()( - this->cpu_ctx, this->eigenvalue, nbase_x, "DAV::eig"); + base_device::memory::resize_memory_op()(this->eigenvalue, nbase_x, "DAV::eig"); base_device::memory::set_memory_op()( this->cpu_ctx, this->eigenvalue, 0, nbase_x); // basis(dim, nbase_x), leading dimension = dim - resmem_complex_op()(this->ctx, basis, nbase_x * dim, "DAV::basis"); + resmem_complex_op()(basis, nbase_x * dim, "DAV::basis"); setmem_complex_op()(this->ctx, basis, 0, nbase_x * dim); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // hpsi(nbase_x, dim); // the product of H and psi in the reduced basis set - resmem_complex_op()(this->ctx, this->hpsi, nbase_x * dim, "DAV::hpsi"); + resmem_complex_op()(this->hpsi, nbase_x * dim, "DAV::hpsi"); setmem_complex_op()(this->ctx, this->hpsi, 0, nbase_x * dim); // spsi(nbase_x, dim); // the Product of S and psi in the reduced basis set - resmem_complex_op()(this->ctx, this->spsi, nbase_x * dim, "DAV::spsi"); + resmem_complex_op()(this->spsi, nbase_x * dim, "DAV::spsi"); setmem_complex_op()(this->ctx, this->spsi, 0, nbase_x * dim); // hcc(nbase_x, nbase_x); // Hamiltonian on the reduced basis - resmem_complex_op()(this->ctx, this->hcc, nbase_x * nbase_x, "DAV::hcc"); + resmem_complex_op()(this->hcc, nbase_x * nbase_x, "DAV::hcc"); setmem_complex_op()(this->ctx, this->hcc, 0, nbase_x * nbase_x); // scc(nbase_x, nbase_x); // Overlap on the reduced basis @@ -86,19 +85,19 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, // setmem_complex_op()(this->ctx, this->scc, 0, nbase_x * nbase_x); // vcc(nbase_x, nbase_x); // Eigenvectors of hcc - resmem_complex_op()(this->ctx, this->vcc, nbase_x * nbase_x, "DAV::vcc"); + resmem_complex_op()(this->vcc, nbase_x * nbase_x, "DAV::vcc"); setmem_complex_op()(this->ctx, this->vcc, 0, nbase_x * nbase_x); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // lagrange_matrix(nband, nband); // for orthogonalization - resmem_complex_op()(this->ctx, this->lagrange_matrix, nband * nband); + resmem_complex_op()(this->lagrange_matrix, nband * nband); setmem_complex_op()(this->ctx, this->lagrange_matrix, 0, nband * nband); #if defined(__CUDA) || defined(__ROCM) // device precondition array if (this->device == base_device::GpuDevice) { - resmem_var_op()(this->ctx, this->d_precondition, dim); + resmem_var_op()(this->d_precondition, dim); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, this->precondition, dim); } #endif @@ -353,7 +352,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // vc_ev_vector(notconv, nbase); // eigenvectors of unconverged index extracted from vcc T* vc_ev_vector = nullptr; - resmem_complex_op()(this->ctx, vc_ev_vector, notconv * nbase); + resmem_complex_op()(vc_ev_vector, notconv * nbase); setmem_complex_op()(this->ctx, vc_ev_vector, 0, notconv * nbase); //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @@ -419,7 +418,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, { #if defined(__CUDA) || defined(__ROCM) Real* e_temp_gpu = nullptr; - resmem_var_op()(this->ctx, e_temp_gpu, nbase); + resmem_var_op()(e_temp_gpu, nbase); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, e_temp_gpu, e_temp_cpu.data(), nbase); vector_mul_vector_op()(this->ctx, nbase, @@ -499,7 +498,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // there is a nbase to nbase + notconv band orthogonalise // plan for SchmidtOrth T* lagrange = nullptr; - resmem_complex_op()(this->ctx, lagrange, notconv * (nbase + notconv)); + resmem_complex_op()(lagrange, notconv * (nbase + notconv)); setmem_complex_op()(this->ctx, lagrange, 0, notconv * (nbase + notconv)); std::vector pre_matrix_mm_m(notconv, 0); @@ -700,7 +699,7 @@ void DiagoDavid::diag_zhegvx(const int& nbase, { #if defined(__CUDA) || defined(__ROCM) Real* eigenvalue_gpu = nullptr; - resmem_var_op()(this->ctx, eigenvalue_gpu, nbase_x); + resmem_var_op()(eigenvalue_gpu, nbase_x); syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalue_gpu, this->eigenvalue, nbase_x); dnevx_op()(this->ctx, nbase, nbase_x, hcc, nband, eigenvalue_gpu, vcc); @@ -824,16 +823,14 @@ void DiagoDavid::refresh(const int& dim, T* hcc_cpu = nullptr; // T* scc_cpu = nullptr; T* vcc_cpu = nullptr; - base_device::memory::resize_memory_op()(this->cpu_ctx, - hcc_cpu, + base_device::memory::resize_memory_op()(hcc_cpu, nbase_x * nbase_x, "DAV::hcc"); // base_device::memory::resize_memory_op()(this->cpu_ctx, // scc_cpu, // nbase_x * nbase_x, // "DAV::scc"); - base_device::memory::resize_memory_op()(this->cpu_ctx, - vcc_cpu, + base_device::memory::resize_memory_op()(vcc_cpu, nbase_x * nbase_x, "DAV::vcc"); diff --git a/source/module_hsolver/diago_iter_assist.cpp b/source/module_hsolver/diago_iter_assist.cpp index c05ecdf8ec..d216ad02fa 100644 --- a/source/module_hsolver/diago_iter_assist.cpp +++ b/source/module_hsolver/diago_iter_assist.cpp @@ -42,9 +42,9 @@ void DiagoIterAssist::diagH_subspace(const hamilt::Hamilt* assert(n_band <= nstart); T *hcc = nullptr, *scc = nullptr, *vcc = nullptr; - resmem_complex_op()(ctx, hcc, nstart * nstart, "DiagSub::hcc"); - resmem_complex_op()(ctx, scc, nstart * nstart, "DiagSub::scc"); - resmem_complex_op()(ctx, vcc, nstart * nstart, "DiagSub::vcc"); + resmem_complex_op()(hcc, nstart * nstart, "DiagSub::hcc"); + resmem_complex_op()(scc, nstart * nstart, "DiagSub::scc"); + resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); setmem_complex_op()(ctx, hcc, 0, nstart * nstart); setmem_complex_op()(ctx, scc, 0, nstart * nstart); setmem_complex_op()(ctx, vcc, 0, nstart * nstart); @@ -61,7 +61,7 @@ void DiagoIterAssist::diagH_subspace(const hamilt::Hamilt* } else { - resmem_complex_op()(ctx, temp, nstart * dmax, "DiagSub::temp"); + resmem_complex_op()(temp, nstart * dmax, "DiagSub::temp"); } { // code block to calculate hcc and scc @@ -192,9 +192,9 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* // ModuleBase::ComplexMatrix sc(nstart, nstart); // ModuleBase::ComplexMatrix hvec(nstart, n_band); T *hcc = nullptr, *scc = nullptr, *vcc = nullptr; - resmem_complex_op()(ctx, hcc, nstart * nstart, "DiagSub::hcc"); - resmem_complex_op()(ctx, scc, nstart * nstart, "DiagSub::scc"); - resmem_complex_op()(ctx, vcc, nstart * nstart, "DiagSub::vcc"); + resmem_complex_op()(hcc, nstart * nstart, "DiagSub::hcc"); + resmem_complex_op()(scc, nstart * nstart, "DiagSub::scc"); + resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); setmem_complex_op()(ctx, hcc, 0, nstart * nstart); setmem_complex_op()(ctx, scc, 0, nstart * nstart); setmem_complex_op()(ctx, vcc, 0, nstart * nstart); @@ -206,7 +206,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* T* ppsi = psi_temp.get_pointer(); // hpsi and spsi share the temp space T* temp = nullptr; - resmem_complex_op()(ctx, temp, psi_nc, "DiagSub::temp"); + resmem_complex_op()(temp, psi_nc, "DiagSub::temp"); setmem_complex_op()(ctx, temp, 0, psi_nc); T* hpsi = temp; @@ -255,7 +255,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* syncmem_complex_op()(ctx, ctx, ppsi, psi, psi_temp.size()); // hpsi and spsi share the temp space T* temp = nullptr; - resmem_complex_op()(ctx, temp, nstart * psi_nc, "DiagSub::temp"); + resmem_complex_op()(temp, nstart * psi_nc, "DiagSub::temp"); setmem_complex_op()(ctx, temp, 0, nstart * psi_nc); T* hpsi = temp; @@ -377,7 +377,7 @@ void DiagoIterAssist::diagH_LAPACK(const int nstart, ModuleBase::timer::tick("DiagoIterAssist", "diagH_LAPACK"); Real* eigenvalues = nullptr; - resmem_var_op()(ctx, eigenvalues, nstart); + resmem_var_op()(eigenvalues, nstart); setmem_var_op()(ctx, eigenvalues, 0, nstart); dngvd_op()(ctx, nstart, ldh, hcc, scc, eigenvalues, vcc); @@ -430,7 +430,7 @@ void DiagoIterAssist::cal_hs_subspace(const hamilt::Hamilt const int dmax = psi.get_nbasis(); T* temp = nullptr; - resmem_complex_op()(ctx, temp, nstart * dmax, "DiagSub::temp"); + resmem_complex_op()(temp, nstart * dmax, "DiagSub::temp"); setmem_complex_op()(ctx, temp, 0, nstart * dmax); { // code block to calculate hcc and scc @@ -502,7 +502,7 @@ void DiagoIterAssist::diag_responce( const T* hcc, const int nstart = nbands; T *vcc = nullptr; - resmem_complex_op()(ctx, vcc, nstart * nstart, "DiagSub::vcc"); + resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); setmem_complex_op()(ctx, vcc, 0, nstart * nstart); // after generation of H and S matrix, diag them @@ -545,7 +545,7 @@ void DiagoIterAssist::diag_subspace_psi(const T* hcc, const int n_band = evc.get_nbands(); T *vcc = nullptr; - resmem_complex_op()(ctx, vcc, nstart * nstart, "DiagSub::vcc"); + resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); setmem_complex_op()(ctx, vcc, 0, nstart * nstart); // after generation of H and S matrix, diag them @@ -555,7 +555,7 @@ void DiagoIterAssist::diag_subspace_psi(const T* hcc, const int dmin = evc.get_current_ngk(); const int dmax = evc.get_nbasis(); T* temp = nullptr; - resmem_complex_op()(ctx, temp, nstart * dmax, "DiagSub::temp"); + resmem_complex_op()(temp, nstart * dmax, "DiagSub::temp"); setmem_complex_op()(ctx, temp, 0, nstart * dmax); gemm_op()(ctx, 'N', diff --git a/source/module_hsolver/kernels/cuda/math_kernel_op.cu b/source/module_hsolver/kernels/cuda/math_kernel_op.cu index 70ed5ebf0b..2318e14d57 100644 --- a/source/module_hsolver/kernels/cuda/math_kernel_op.cu +++ b/source/module_hsolver/kernels/cuda/math_kernel_op.cu @@ -887,7 +887,7 @@ void matrixTranspose_op::operator()(const base_ double* output_matrix) { double* device_temp = nullptr; - base_device::memory::resize_memory_op()(d, device_temp, row * col); + base_device::memory::resize_memory_op()(device_temp, row * col); if (row == col) { @@ -924,7 +924,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator( std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(d, device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); if (row == col) { @@ -968,7 +968,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(d, device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); if (row == col) { diff --git a/source/module_hsolver/kernels/math_kernel_op.cpp b/source/module_hsolver/kernels/math_kernel_op.cpp index 3a752c3659..b0930e02d9 100644 --- a/source/module_hsolver/kernels/math_kernel_op.cpp +++ b/source/module_hsolver/kernels/math_kernel_op.cpp @@ -323,7 +323,7 @@ struct matrixTranspose_op T* output_matrix) { T* temp = nullptr; - base_device::memory::resize_memory_op()(d, temp, row * col, "MTransOp"); + base_device::memory::resize_memory_op()(temp, row * col, "MTransOp"); #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 8192 / sizeof(T)) #endif diff --git a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu index ef5a1c1ece..0b9b11970e 100644 --- a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu +++ b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu @@ -806,7 +806,7 @@ void matrixTranspose_op::operator()(const base_ double* output_matrix) { double* device_temp = nullptr; - base_device::memory::resize_memory_op()(d, device_temp, row * col); + base_device::memory::resize_memory_op()(device_temp, row * col); if (row == col) { @@ -841,7 +841,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator( std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(d, device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); if (row == col) { @@ -881,7 +881,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(d, device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); if (row == col) { diff --git a/source/module_hsolver/kernels/test/math_dngvd_test.cpp b/source/module_hsolver/kernels/test/math_dngvd_test.cpp index 8b614ae9a0..71a41073f0 100644 --- a/source/module_hsolver/kernels/test/math_dngvd_test.cpp +++ b/source/module_hsolver/kernels/test/math_dngvd_test.cpp @@ -140,7 +140,7 @@ TEST_F(TestModuleHsolverMathDngvd, transpose_gpu) // {-0.351417,-1.73472}, {-8.32667,2.3744}, {4.16334,3.64292} }; std::complex* device_transpose = nullptr; - resize_memory_op_Z()(gpu_ctx, device_transpose, matrix_size); + resize_memory_op_Z()(device_transpose, matrix_size); synchronize_memory_op_C2G_Z()(gpu_ctx, cpu_ctx, device_transpose, transpose.data(), transpose.size()); // run diff --git a/source/module_hsolver/kernels/test/math_kernel_test.cpp b/source/module_hsolver/kernels/test/math_kernel_test.cpp index c2c66fb936..ca5116a310 100644 --- a/source/module_hsolver/kernels/test/math_kernel_test.cpp +++ b/source/module_hsolver/kernels/test/math_kernel_test.cpp @@ -371,8 +371,8 @@ TEST_F(TestModuleHsolverMathKernel, gemv_op_cpu) TEST_F(TestModuleHsolverMathKernel, zdot_real_op_gpu) { std::complex*psi_L_dev = NULL, *psi_R_dev = NULL; - resize_memory_op()(gpu_ctx, psi_L_dev, psi_L.size()); - resize_memory_op()(gpu_ctx, psi_R_dev, psi_R.size()); + resize_memory_op()(psi_L_dev, psi_L.size()); + resize_memory_op()(psi_R_dev, psi_R.size()); synchronize_memory_op()(gpu_ctx, cpu_ctx, psi_L_dev, psi_L.data(), psi_L.size()); synchronize_memory_op()(gpu_ctx, cpu_ctx, psi_R_dev, psi_R.data(), psi_R.size()); hsolver::createGpuBlasHandle(); @@ -390,8 +390,8 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_constant_op_gpu) // in GPU std::complex* input_dev = NULL; std::complex* output_dev = NULL; - resize_memory_op()(gpu_ctx, input_dev, input.size()); - resize_memory_op()(gpu_ctx, output_dev, input.size()); + resize_memory_op()(input_dev, input.size()); + resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, input_dev, input.data(), input.size()); // run @@ -419,9 +419,9 @@ TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) std::complex* output_dev = NULL; // resize memory for values - resize_memory_op()(gpu_ctx, input_dev, input.size()); - resize_memory_op_double()(gpu_ctx, input_double_dev, input.size()); - resize_memory_op()(gpu_ctx, output_dev, input.size()); + resize_memory_op()(input_dev, input.size()); + resize_memory_op_double()(input_double_dev, input.size()); + resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, input_dev, input.data(), input.size()); @@ -455,9 +455,9 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_gpu) std::complex* output_dev = NULL; // resize memory for values in GPU - resize_memory_op()(gpu_ctx, input_dev, input.size()); - resize_memory_op_double()(gpu_ctx, input_double_dev, input.size()); - resize_memory_op()(gpu_ctx, output_dev, input.size()); + resize_memory_op()(input_dev, input.size()); + resize_memory_op_double()(input_double_dev, input.size()); + resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, input_dev, input.data(), input.size()); @@ -491,9 +491,9 @@ TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_gp std::complex* output_dev = NULL; // resize memory for values in GPU - resize_memory_op()(gpu_ctx, input1_dev, input.size()); - resize_memory_op()(gpu_ctx, input2_dev, input.size()); - resize_memory_op()(gpu_ctx, output_dev, input.size()); + resize_memory_op()(input1_dev, input.size()); + resize_memory_op()(input2_dev, input.size()); + resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, input1_dev, input1.data(), input.size()); @@ -529,8 +529,8 @@ TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu) std::complex* Y_axpy_dev = NULL; // resize memory for values in GPU - resize_memory_op()(gpu_ctx, X_axpy_dev, X_axpy.size()); - resize_memory_op()(gpu_ctx, Y_axpy_dev, Y_axpy.size()); + resize_memory_op()(X_axpy_dev, X_axpy.size()); + resize_memory_op()(Y_axpy_dev, Y_axpy.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, X_axpy_dev, X_axpy.data(), X_axpy.size()); @@ -560,7 +560,7 @@ TEST_F(TestModuleHsolverMathKernel, scal_op_gpu) std::complex* X_scal_dev = NULL; // resize memory for values in GPU - resize_memory_op()(gpu_ctx, X_scal_dev, X_scal.size()); + resize_memory_op()(X_scal_dev, X_scal.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, X_scal_dev, X_scal.data(), X_scal.size()); @@ -589,9 +589,9 @@ TEST_F(TestModuleHsolverMathKernel, gemv_op_gpu) std::complex* Y_gemv_dev = NULL; // resize memory for values in GPU - resize_memory_op()(gpu_ctx, A_gemv_dev, A_gemv.size()); - resize_memory_op()(gpu_ctx, X_gemv_dev, X_gemv.size()); - resize_memory_op()(gpu_ctx, Y_gemv_dev, Y_gemv.size()); + resize_memory_op()(A_gemv_dev, A_gemv.size()); + resize_memory_op()(X_gemv_dev, X_gemv.size()); + resize_memory_op()(Y_gemv_dev, Y_gemv.size()); // syn the input data in CPU to GPU synchronize_memory_op()(gpu_ctx, cpu_ctx, A_gemv_dev, A_gemv.data(), A_gemv.size()); @@ -654,7 +654,7 @@ TEST_F(TestModuleHsolverMathKernel, matrixSetToAnother_op_gpu) int LDB = 4; std::complex* device_A = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(gpu_ctx, device_A, A.size()); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_A, A.size()); base_device::memory:: synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(gpu_ctx, cpu_ctx, @@ -663,7 +663,7 @@ TEST_F(TestModuleHsolverMathKernel, matrixSetToAnother_op_gpu) A.size()); std::complex* device_B = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(gpu_ctx, device_B, B.size()); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_B, B.size()); base_device::memory:: synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(gpu_ctx, cpu_ctx, diff --git a/source/module_hsolver/kernels/test/perf_math_kernel.cpp b/source/module_hsolver/kernels/test/perf_math_kernel.cpp index 173ef8b40b..3ea380ba13 100644 --- a/source/module_hsolver/kernels/test/perf_math_kernel.cpp +++ b/source/module_hsolver/kernels/test/perf_math_kernel.cpp @@ -105,12 +105,12 @@ class PerfModuleHsolverMathKernel : public benchmark::Fixture { zconstant_a = std::complex{(double)rand()+(double)rand()/(RAND_MAX+1.0),(double)rand()+(double)rand()/(RAND_MAX+1.0)}; #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM - resize_memory_op()(gpu_ctx, test_zvector_a_gpu, dim_vector); - resize_memory_op()(gpu_ctx, test_zvector_b_gpu, dim_vector); + resize_memory_op()(test_zvector_a_gpu, dim_vector); + resize_memory_op()(test_zvector_b_gpu, dim_vector); synchronize_memory_op()(gpu_ctx, cpu_ctx, test_zvector_a_gpu, test_zvector_a, dim_vector); synchronize_memory_op()(gpu_ctx, cpu_ctx, test_zvector_b_gpu, test_zvector_b, dim_vector); - resize_memory_op()(gpu_ctx, result_zvector_gpu, dim_vector); + resize_memory_op()(result_zvector_gpu, dim_vector); resize_memory_op_double()(gpu_ctx, test_dvector_a_gpu, dim_vector); synchronize_memory_op_double()(gpu_ctx, cpu_ctx, test_dvector_a_gpu, test_dvector_a, dim_vector); diff --git a/source/module_psi/psi.cpp b/source/module_psi/psi.cpp index 7942b412c9..56d470e8a0 100644 --- a/source/module_psi/psi.cpp +++ b/source/module_psi/psi.cpp @@ -58,7 +58,7 @@ Psi::Psi(const int nk_in, const int nbd_in, const int nbs_in, const i this->ngk = ngk_in; // modify later // This function will delete the psi array first(if psi exist), then malloc a new memory for it. - resize_memory_op()(this->ctx, this->psi, nk_in * static_cast(nbd_in) * nbs_in, "no_record"); + resize_memory_op()(this->psi, nk_in * static_cast(nbd_in) * nbs_in, "no_record"); this->nk = nk_in; this->nbands = nbd_in; @@ -96,7 +96,7 @@ Psi::Psi(const int nk_in, this->ngk = ngk_in.data(); // modify later // This function will delete the psi array first(if psi exist), then malloc a new memory for it. - resize_memory_op()(this->ctx, this->psi, nk_in * static_cast(nbd_in) * nbs_in, "no_record"); + resize_memory_op()(this->psi, nk_in * static_cast(nbd_in) * nbs_in, "no_record"); this->nk = nk_in; this->nbands = nbd_in; @@ -166,7 +166,7 @@ Psi::Psi(const int nk_in, this->ngk = nullptr; assert(nk_in > 0 && nbd_in >= 0 && nbs_in > 0); - resize_memory_op()(this->ctx, this->psi, nk_in * static_cast(nbd_in) * nbs_in, "no_record"); + resize_memory_op()(this->psi, nk_in * static_cast(nbd_in) * nbs_in, "no_record"); this->nk = nk_in; this->nbands = nbd_in; @@ -278,7 +278,7 @@ void Psi::resize(const int nks_in, const int nbands_in, const int nba assert(nks_in > 0 && nbands_in >= 0 && nbasis_in > 0); // This function will delete the psi array first(if psi exist), then malloc a new memory for it. - resize_memory_op()(this->ctx, this->psi, nks_in * static_cast(nbands_in) * nbasis_in, "no_record"); + resize_memory_op()(this->psi, nks_in * static_cast(nbands_in) * nbasis_in, "no_record"); // this->zero_out(); From eb7c145b3501b22b7a6fdc31b0d45f6737cac1c1 Mon Sep 17 00:00:00 2001 From: critsium-xy Date: Thu, 16 Jan 2025 11:09:01 +0800 Subject: [PATCH 2/7] Small bug fix --- source/module_base/module_device/memory_op.cpp | 6 +++--- source/module_base/module_device/memory_op.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/module_base/module_device/memory_op.cpp b/source/module_base/module_device/memory_op.cpp index 0e8afc6fe8..5433f84d48 100644 --- a/source/module_base/module_device/memory_op.cpp +++ b/source/module_base/module_device/memory_op.cpp @@ -400,13 +400,13 @@ template struct delete_memory_op_mt, base_device::DEVICE_CP #endif template -void resize_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type) +void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice){ - resize_memory_op()(arr); + resize_memory_op()(arr, size); } else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - resize_memory_op()(arr); + resize_memory_op()(arr, size); } } diff --git a/source/module_base/module_device/memory_op.h b/source/module_base/module_device/memory_op.h index af7b2e21a0..5804b58829 100644 --- a/source/module_base/module_device/memory_op.h +++ b/source/module_base/module_device/memory_op.h @@ -93,7 +93,7 @@ struct delete_memory_op }; template -void resize_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); +void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); template void set_memory(FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); From 851d8bf46af59e052b1e23e3ce71ab85fabb3b28 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 16 Jan 2025 04:00:14 +0000 Subject: [PATCH 3/7] [pre-commit.ci lite] apply automatic fixes --- .../module_basis/module_pw/pw_basis_sup.cpp | 53 ++++++++++++------- .../module_basis/module_pw/pw_distributeg.cpp | 30 +++++++---- 2 files changed, 55 insertions(+), 28 deletions(-) diff --git a/source/module_basis/module_pw/pw_basis_sup.cpp b/source/module_basis/module_pw/pw_basis_sup.cpp index 3965db2404..33ccc4b7e7 100644 --- a/source/module_basis/module_pw/pw_basis_sup.cpp +++ b/source/module_basis/module_pw/pw_basis_sup.cpp @@ -100,8 +100,9 @@ void PW_Basis_Sup::distribution_method3(const ModulePW::PW_Basis* pw_rho) this->npw_per = new int[this->poolnproc]; // number of planewaves on each core. delete[] this->fftixy2ip; this->fftixy2ip = new int[this->fftnxy]; // ip of core which contains the stick on (x, y). - for (int ixy = 0; ixy < this->fftnxy; ++ixy) + for (int ixy = 0; ixy < this->fftnxy; ++ixy) { this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). +} if (poolrank == 0) { // (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). @@ -212,10 +213,11 @@ void PW_Basis_Sup::divide_sticks_3( int fftnx_s = nx_s; if (this->gamma_only) { - if (this->xprime) + if (this->xprime) { fftnx_s = int(nx_s / 2) + 1; - else + } else { fftny_s = int(ny_s / 2) + 1; +} } int fftnxy_s = fftnx_s * fftny_s; @@ -225,15 +227,19 @@ void PW_Basis_Sup::divide_sticks_3( { int ix = ixy / fftny_s; int iy = ixy % fftny_s; - if (ix >= int(nx_s / 2) + 1) + if (ix >= int(nx_s / 2) + 1) { ix -= nx_s; - if (iy >= int(ny_s / 2) + 1) +} + if (iy >= int(ny_s / 2) + 1) { iy -= ny_s; +} - if (ix < 0) + if (ix < 0) { ix += nx; - if (iy < 0) +} + if (iy < 0) { iy += ny; +} int index = ix * this->fftny + iy; int ip = fftixy2ip_s[ixy]; if (ip >= 0) @@ -349,8 +355,9 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( fftixy2is[ixy] = st_move; st_move++; } - if (st_move == this->nst) + if (st_move == this->nst) { break; +} } // distribute planewaves in the same order as smooth grids first. @@ -363,19 +370,25 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( int ixy = pw_rho->is2fftixy[is]; int ix = ixy / pw_rho->fftny; int iy = ixy % pw_rho->fftny; - if (ix >= int(pw_rho->nx / 2) + 1) + if (ix >= int(pw_rho->nx / 2) + 1) { ix -= pw_rho->nx; - if (iy >= int(pw_rho->ny / 2) + 1) +} + if (iy >= int(pw_rho->ny / 2) + 1) { iy -= pw_rho->ny; - if (iz >= int(pw_rho->nz / 2) + 1) +} + if (iz >= int(pw_rho->nz / 2) + 1) { iz -= pw_rho->nz; +} - if (ix < 0) + if (ix < 0) { ix += this->nx; - if (iy < 0) +} + if (iy < 0) { iy += this->ny; - if (iz < 0) +} + if (iz < 0) { iz += this->nz; +} int ixy_now = ix * this->fftny + iy; int index = ixy_now * this->nz + iz; int is_now = fftixy2is[ixy_now]; @@ -383,8 +396,9 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( this->ig2isz[ig] = isz_now; pw_filled++; found[index] = true; - if (xprime && ix == 0) + if (xprime && ix == 0) { ng_xeq0++; +} } assert(pw_filled == pw_rho->npw); @@ -397,21 +411,24 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( for (int iz = zstart; iz < zstart + st_length2D[ixy]; ++iz) { int z = iz; - if (z < 0) + if (z < 0) { z += this->nz; +} if (!found[ixy * this->nz + z]) { found[ixy * this->nz + z] = true; int is = fftixy2is[ixy]; this->ig2isz[pw_filled] = is * this->nz + z; pw_filled++; - if (xprime && ixy / fftny == 0) + if (xprime && ixy / fftny == 0) { ng_xeq0++; +} } } } - if (pw_filled == this->npw) + if (pw_filled == this->npw) { break; +} } delete[] fftixy2is; diff --git a/source/module_basis/module_pw/pw_distributeg.cpp b/source/module_basis/module_pw/pw_distributeg.cpp index 5c2584642b..0f1b29554a 100644 --- a/source/module_basis/module_pw/pw_distributeg.cpp +++ b/source/module_basis/module_pw/pw_distributeg.cpp @@ -101,8 +101,10 @@ void PW_Basis::count_pw_st( // so that its index in st_length and st_bottom is 9 * 10 + 2 = 92. int x = ix; int y = iy; - if (x < 0) x += this->nx; - if (y < 0) y += this->ny; + if (x < 0) { x += this->nx; +} + if (y < 0) { y += this->ny; +} int index = x * this->fftny + y; int length = 0; // number of planewave on stick (x, y). @@ -114,13 +116,18 @@ void PW_Basis::count_pw_st( double modulus = f * (this->GGT * f); if (modulus <= this->ggecut || this->full_pw) { - if (length == 0) st_bottom2D[index] = iz; // length == 0 means this point is the bottom of stick (x, y). + if (length == 0) { st_bottom2D[index] = iz; // length == 0 means this point is the bottom of stick (x, y). +} ++this->npwtot; ++length; - if(iy < this->riy) this->riy = iy; - if(iy > this->liy) this->liy = iy; - if(ix < this->rix) this->rix = ix; - if(ix > this->lix) this->lix = ix; + if(iy < this->riy) { this->riy = iy; +} + if(iy > this->liy) { this->liy = iy; +} + if(ix < this->rix) { this->rix = ix; +} + if(ix > this->lix) { this->lix = ix; +} } } if (length > 0) @@ -182,15 +189,18 @@ void PW_Basis::get_ig2isz_is2fftixy( for (int iz = zstart; iz < zstart + st_length2D[ixy]; ++iz) { int z = iz; - if (z < 0) z += this->nz; + if (z < 0) { z += this->nz; +} this->ig2isz[pw_filled] = st_move * this->nz + z; pw_filled++; } this->is2fftixy[st_move] = ixy; st_move++; - if(xprime && ixy/fftny == 0) ng_xeq0 = pw_filled; + if(xprime && ixy/fftny == 0) { ng_xeq0 = pw_filled; +} } - if (st_move == this->nst && pw_filled == this->npw) break; + if (st_move == this->nst && pw_filled == this->npw) { break; +} } #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { From d485c505b387c490a130cbe7725318a9eac96150 Mon Sep 17 00:00:00 2001 From: critsium-xy Date: Thu, 16 Jan 2025 15:30:42 +0800 Subject: [PATCH 4/7] Remove all ctx parameters in set_memory_op --- source/module_base/math_chebyshev.cpp | 4 +-- .../module_device/cuda/memory_op.cu | 5 ++- .../module_base/module_device/memory_op.cpp | 4 +-- source/module_base/module_device/memory_op.h | 5 ++- .../module_device/rocm/memory_op.hip.cu | 3 +- .../module_device/test/memory_test.cpp | 8 ++--- .../module_basis/module_pw/pw_transform_k.cpp | 2 -- source/module_elecstate/elecstate_pw.cpp | 10 +++--- .../module_elecstate/elecstate_pw_cal_tau.cpp | 4 +-- source/module_elecstate/elecstate_pw_sdft.cpp | 2 +- .../hamilt_pwdft/VNL_in_pw.cpp | 2 +- .../hamilt_pwdft/forces_nl.cpp | 2 +- .../hamilt_pwdft/forces_onsite.cpp | 2 +- .../hamilt_pwdft/hamilt_pw.cpp | 2 +- .../hamilt_pwdft/operator_pw/meta_pw.cpp | 2 +- .../hamilt_pwdft/operator_pw/nonlocal_pw.cpp | 4 +-- .../operator_pw/onsite_proj_pw.cpp | 4 +-- .../hamilt_pwdft/operator_pw/veff_pw.cpp | 2 +- .../hamilt_pwdft/stress_func_nl.cpp | 2 +- .../hamilt_pwdft/stress_func_onsite.cpp | 2 +- .../hamilt_stodft/sto_forces.cpp | 2 +- .../hamilt_stodft/sto_iter.cpp | 4 +-- .../hamilt_stodft/sto_stress_pw.cpp | 2 +- .../module_hamilt_pw/hamilt_stodft/sto_wf.cpp | 2 +- source/module_hsolver/diago_dav_subspace.cpp | 18 +++++----- source/module_hsolver/diago_david.cpp | 27 +++++++-------- source/module_hsolver/diago_iter_assist.cpp | 34 +++++++++---------- source/module_psi/psi.cpp | 2 +- 28 files changed, 78 insertions(+), 84 deletions(-) diff --git a/source/module_base/math_chebyshev.cpp b/source/module_base/math_chebyshev.cpp index 6a074c7e71..a5190ba34a 100644 --- a/source/module_base/math_chebyshev.cpp +++ b/source/module_base/math_chebyshev.cpp @@ -437,7 +437,7 @@ void Chebyshev::calfinalvec_real( funA(arrayn_1, arrayn, m); // 0- & 1-st order - setmem_complex_op()(this->ctx, waveout, 0, ndmxt); + setmem_complex_op()(waveout, 0, ndmxt); std::complex coef0 = std::complex(coefr_cpu[0], 0); container::kernels::blas_axpy, ct_Device>()(ndmxt, &coef0, arrayn_1, 1, waveout, 1); std::complex coef1 = std::complex(coefr_cpu[1], 0); @@ -505,7 +505,7 @@ void Chebyshev::calfinalvec_complex( funA(arrayn_1, arrayn, m); // 0- & 1-st order - setmem_complex_op()(this->ctx, waveout, 0, ndmxt); + setmem_complex_op()(waveout, 0, ndmxt); container::kernels::blas_axpy, ct_Device>()(ndmxt, &coefc_cpu[0], arrayn_1, 1, waveout, 1); container::kernels::blas_axpy, ct_Device>()(ndmxt, &coefc_cpu[1], arrayn, 1, waveout, 1); // for (int i = 0; i < ndmxt; ++i) diff --git a/source/module_base/module_device/cuda/memory_op.cu b/source/module_base/module_device/cuda/memory_op.cu index 97445db1f7..de481fe90b 100644 --- a/source/module_base/module_device/cuda/memory_op.cu +++ b/source/module_base/module_device/cuda/memory_op.cu @@ -53,7 +53,7 @@ __global__ void cast_memory(std::complex* out, const FPTYPE_in* in, template void resize_memory_op::operator()(FPTYPE*& arr, - const size_t size, + const size_t size, const char* record_in) { if (arr != nullptr) @@ -78,8 +78,7 @@ void resize_memory_op::operator()(FPTYPE*& arr, } template -void set_memory_op::operator()(const base_device::DEVICE_GPU* dev, - FPTYPE* arr, +void set_memory_op::operator()(FPTYPE* arr, const int var, const size_t size) { diff --git a/source/module_base/module_device/memory_op.cpp b/source/module_base/module_device/memory_op.cpp index 5433f84d48..b18e2cea39 100644 --- a/source/module_base/module_device/memory_op.cpp +++ b/source/module_base/module_device/memory_op.cpp @@ -45,7 +45,7 @@ struct resize_memory_op template struct set_memory_op { - void operator()(const base_device::DEVICE_CPU* dev, FPTYPE* arr, const int var, const size_t size) + void operator()(FPTYPE* arr, const int var, const size_t size) { ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { int beg = 0, len = 0; @@ -166,7 +166,7 @@ struct resize_memory_op template struct set_memory_op { - void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr, const int var, const size_t size) + void operator()(FPTYPE* arr, const int var, const size_t size) { } }; diff --git a/source/module_base/module_device/memory_op.h b/source/module_base/module_device/memory_op.h index 5804b58829..11f9fd7a9e 100644 --- a/source/module_base/module_device/memory_op.h +++ b/source/module_base/module_device/memory_op.h @@ -32,13 +32,12 @@ struct set_memory_op /// @brief memset for multi-device /// /// Input Parameters - /// \param dev : the type of computing device /// \param var : the specified constant value /// \param size : array size /// /// Output Parameters /// \param arr : output array initialized by the input value - void operator()(const Device* dev, FPTYPE* arr, const int var, const size_t size); + void operator()(FPTYPE* arr, const int var, const size_t size); }; template @@ -120,7 +119,7 @@ struct resize_memory_op template struct set_memory_op { - void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr, const int var, const size_t size); + void operator()(FPTYPE* arr, const int var, const size_t size); }; template diff --git a/source/module_base/module_device/rocm/memory_op.hip.cu b/source/module_base/module_device/rocm/memory_op.hip.cu index 1909cfb771..96fb12a35c 100644 --- a/source/module_base/module_device/rocm/memory_op.hip.cu +++ b/source/module_base/module_device/rocm/memory_op.hip.cu @@ -39,8 +39,7 @@ __global__ void cast_memory(std::complex* out, const std::complex -void resize_memory_op::operator()(const base_device::DEVICE_GPU* dev, - FPTYPE*& arr, +void resize_memory_op::operator()(FPTYPE*& arr, const size_t size, const char* record_in) { diff --git a/source/module_base/module_device/test/memory_test.cpp b/source/module_base/module_device/test/memory_test.cpp index ac7d0af22c..a2d08f1f5b 100644 --- a/source/module_base/module_device/test/memory_test.cpp +++ b/source/module_base/module_device/test/memory_test.cpp @@ -91,7 +91,7 @@ class TestModulePsiMemory : public ::testing::Test TEST_F(TestModulePsiMemory, set_memory_op_double_cpu) { std::vector v_xx = xx; - set_memory_double_cpu_op()(cpu_ctx, v_xx.data(), 0, xx.size()); + set_memory_double_cpu_op()(v_xx.data(), 0, xx.size()); for (int ii = 0; ii < xx.size(); ii++) { EXPECT_EQ(v_xx[ii], 0.0); @@ -101,7 +101,7 @@ TEST_F(TestModulePsiMemory, set_memory_op_double_cpu) TEST_F(TestModulePsiMemory, set_memory_op_complex_double_cpu) { std::vector> vz_xx = z_xx; - set_memory_complex_double_cpu_op()(cpu_ctx, vz_xx.data(), 0, z_xx.size()); + set_memory_complex_double_cpu_op()(vz_xx.data(), 0, z_xx.size()); for (int ii = 0; ii < z_xx.size(); ii++) { EXPECT_EQ(vz_xx[ii], std::complex(0.0, 0.0)); @@ -175,7 +175,7 @@ TEST_F(TestModulePsiMemory, set_memory_op_double_gpu) { thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); thrust::copy(xx.begin(), xx.end(), d_xx); - set_memory_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(d_xx), 0, xx.size()); + set_memory_double_gpu_op()(thrust::raw_pointer_cast(d_xx), 0, xx.size()); thrust::host_vector h_xx(xx.size()); thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); for (int ii = 0; ii < xx.size(); ii++) @@ -188,7 +188,7 @@ TEST_F(TestModulePsiMemory, set_memory_op_complex_double_gpu) { thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - set_memory_complex_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(dz_xx), 0, z_xx.size()); + set_memory_complex_double_gpu_op()(thrust::raw_pointer_cast(dz_xx), 0, z_xx.size()); thrust::host_vector> h_xx(z_xx.size()); thrust::copy(dz_xx, dz_xx + z_xx.size(), h_xx.begin()); for (int ii = 0; ii < z_xx.size(); ii++) diff --git a/source/module_basis/module_pw/pw_transform_k.cpp b/source/module_basis/module_pw/pw_transform_k.cpp index 5e3780eef4..16661f7664 100644 --- a/source/module_basis/module_pw/pw_transform_k.cpp +++ b/source/module_basis/module_pw/pw_transform_k.cpp @@ -413,7 +413,6 @@ void PW_Basis_K::recip_to_real(const base_device::DEVICE_GPU* ctx, assert(this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - ctx, this->fft_bundle.get_auxr_3d_data(), 0, this->nxyz); @@ -450,7 +449,6 @@ void PW_Basis_K::recip_to_real(const base_device::DEVICE_GPU* ctx, assert(this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - ctx, this->fft_bundle.get_auxr_3d_data(), 0, this->nxyz); diff --git a/source/module_elecstate/elecstate_pw.cpp b/source/module_elecstate/elecstate_pw.cpp index 24fdedebd6..abe0d4d43d 100644 --- a/source/module_elecstate/elecstate_pw.cpp +++ b/source/module_elecstate/elecstate_pw.cpp @@ -118,15 +118,15 @@ void ElecStatePW::psiToRho(const psi::Psi& psi) { // denghui replaced at 20221110 // ModuleBase::GlobalFunc::ZEROS(this->rho[is], this->charge->nrxx); - setmem_var_op()(this->ctx, this->rho[is], 0, this->charge->nrxx); + setmem_var_op()(this->rho[is], 0, this->charge->nrxx); if (get_xc_func_type() == 3) { // ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx); - setmem_var_op()(this->ctx, this->kin_r[is], 0, this->charge->nrxx); + setmem_var_op()(this->kin_r[is], 0, this->charge->nrxx); } if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) { - setmem_complex_op()(this->ctx, this->rhog[is], 0, this->charge->rhopw->npw); + setmem_complex_op()(this->rhog[is], 0, this->charge->rhopw->npw); } } @@ -244,7 +244,7 @@ void ElecStatePW::rhoBandK(const psi::Psi& psi) { for (int j = 0; j < 3; j++) { - setmem_complex_op()(this->ctx, this->wfcr, 0, this->charge->nrxx); + setmem_complex_op()(this->wfcr, 0, this->charge->nrxx); meta_op()(this->ctx, ik, @@ -280,7 +280,7 @@ void ElecStatePW::cal_becsum(const psi::Psi& psi) resmem_complex_op()(becp, nbands * nkb, "ElecState::becp"); const int nh_tot = this->ppcell->nhm * (this->ppcell->nhm + 1) / 2; resmem_var_op()(becsum, nh_tot * ucell->nat * PARAM.inp.nspin, "ElecState::becsum"); - setmem_var_op()(this->ctx, becsum, 0, nh_tot * ucell->nat * PARAM.inp.nspin); + setmem_var_op()(becsum, 0, nh_tot * ucell->nat * PARAM.inp.nspin); for (int ik = 0; ik < psi.get_nk(); ++ik) { diff --git a/source/module_elecstate/elecstate_pw_cal_tau.cpp b/source/module_elecstate/elecstate_pw_cal_tau.cpp index ad8c9ce42f..98c9a24860 100644 --- a/source/module_elecstate/elecstate_pw_cal_tau.cpp +++ b/source/module_elecstate/elecstate_pw_cal_tau.cpp @@ -9,7 +9,7 @@ void ElecStatePW::cal_tau(const psi::Psi& psi) ModuleBase::TITLE("ElecStatePW", "cal_tau"); for(int is=0; isctx, this->kin_r[is], 0, this->charge->nrxx); + setmem_var_op()(this->kin_r[is], 0, this->charge->nrxx); } for (int ik = 0; ik < psi.get_nk(); ++ik) @@ -31,7 +31,7 @@ void ElecStatePW::cal_tau(const psi::Psi& psi) // kinetic energy density for (int j = 0; j < 3; j++) { - setmem_complex_op()(this->ctx, this->wfcr, 0, this->charge->nrxx); + setmem_complex_op()(this->wfcr, 0, this->charge->nrxx); meta_op()(this->ctx, ik, diff --git a/source/module_elecstate/elecstate_pw_sdft.cpp b/source/module_elecstate/elecstate_pw_sdft.cpp index ad6f98c3c3..ea9e4463c8 100644 --- a/source/module_elecstate/elecstate_pw_sdft.cpp +++ b/source/module_elecstate/elecstate_pw_sdft.cpp @@ -16,7 +16,7 @@ void ElecStatePW_SDFT::psiToRho(const psi::Psi& psi) const int nspin = PARAM.inp.nspin; for (int is = 0; is < nspin; is++) { - setmem_var_op()(this->ctx, this->rho[is], 0, this->charge->nrxx); + setmem_var_op()(this->rho[is], 0, this->charge->nrxx); } if (GlobalV::MY_STOGROUP == 0) diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index 0f7a6127aa..8acf0f4896 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -1080,7 +1080,7 @@ void pseudopot_cell_vnl::radial_fft_q(Device* ctx, const int ivl = nhtolm(itype, ih); const int jvl = nhtolm(itype, jh); - setmem_complex_op()(ctx, qg, 0, ng); + setmem_complex_op()(qg, 0, ng); const double* qnorm_double = reinterpret_cast(qnorm); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp index c02dc4789a..b3891b7f12 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp @@ -28,7 +28,7 @@ void Forces::cal_force_nl(ModuleBase::matrix& forcenl, // allocate memory for the force FPTYPE* force = nullptr; resmem_var_op()(force, ucell_in.nat * 3); - base_device::memory::set_memory_op()(this->ctx, force, 0.0, ucell_in.nat * 3); + base_device::memory::set_memory_op()(force, 0.0, ucell_in.nat * 3); hamilt::FS_Nonlocal_tools nl_tools(&nlpp, &ucell_in, p_kv, wfc_basis, p_sf, wg, &ekb); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp index bbcf883056..703f8fe141 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp @@ -24,7 +24,7 @@ void Forces::cal_force_onsite(ModuleBase::matrix& force_onsite, // allocate memory for the force FPTYPE* force = nullptr; resmem_var_op()(force, ucell_in.nat * 3); - base_device::memory::set_memory_op()(this->ctx, force, 0.0, ucell_in.nat * 3); + base_device::memory::set_memory_op()(force, 0.0, ucell_in.nat * 3); auto* onsite_p = projectors::OnsiteProjector::get_instance(); diff --git a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp index 25590fdc20..1b0992309b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp @@ -295,7 +295,7 @@ void HamiltPW::sPsi(const T* psi_in, // psi } resmem_complex_op()(ps, this->ppcell->nkb * nbands, "Hamilt::ps"); - setmem_complex_op()(this->ctx, ps, 0, this->ppcell->nkb * nbands); + setmem_complex_op()(ps, 0, this->ppcell->nkb * nbands); // spsi = psi + sum qq |beta> if (PARAM.inp.noncolin) diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp index 1f17e35313..83db1d98fa 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp @@ -55,7 +55,7 @@ void Meta>::act( ModuleBase::timer::tick("Operator", "MetaPW"); if(is_first_node) { - setmem_complex_op()(this->ctx, tmhpsi, 0, nbasis*nbands/npol); + setmem_complex_op()(tmhpsi, 0, nbasis*nbands/npol); } const int current_spin = this->isk[this->ik]; diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp index eb40c2251e..e2ec876872 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp @@ -75,7 +75,7 @@ void Nonlocal>::add_nonlocal_pp(T *hpsi_in, const T *becp, resmem_complex_op()(this->ps, nkb * m, "Nonlocal::ps"); this->nkb_m = m * nkb; } - setmem_complex_op()(this->ctx, this->ps, 0, nkb * m); + setmem_complex_op()(this->ps, 0, nkb * m); int sum = 0; int iat = 0; @@ -221,7 +221,7 @@ void Nonlocal>::act( ModuleBase::timer::tick("Operator", "NonlocalPW"); if(is_first_node) { - setmem_complex_op()(this->ctx, tmhpsi, 0, nbasis*nbands/npol); + setmem_complex_op()(tmhpsi, 0, nbasis*nbands/npol); } if(!PARAM.inp.use_paw) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp index 6d328e017f..7787b315dc 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp @@ -130,7 +130,7 @@ void OnsiteProj>::cal_ps_delta_spin(const int npol, const resmem_complex_op()(this->ps, tnp * m, "OnsiteProj::ps"); this->nkb_m = m * tnp; } - setmem_complex_op()(this->ctx, this->ps, 0, tnp * m); + setmem_complex_op()(this->ps, 0, tnp * m); if(!this->init_delta_spin) { @@ -230,7 +230,7 @@ void OnsiteProj>::cal_ps_dftu(const int npol, const int m) } if(!this->has_delta_spin) { - setmem_complex_op()(this->ctx, this->ps, 0, tnp * m); + setmem_complex_op()(this->ps, 0, tnp * m); } if(!this->init_dftu) diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp index 53d40b1980..ab7dce9e7a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp @@ -48,7 +48,7 @@ void Veff>::act( ModuleBase::timer::tick("Operator", "VeffPW"); if(is_first_node) { - setmem_complex_op()(this->ctx, tmhpsi, 0, nbasis*nbands/npol); + setmem_complex_op()(tmhpsi, 0, nbasis*nbands/npol); } int max_npw = nbasis / npol; diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp index fcd5356ee5..383a1666ac 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp @@ -31,7 +31,7 @@ void Stress_Func::stress_nl(ModuleBase::matrix& sigma, FPTYPE* stress_device = nullptr; resmem_var_op()(stress_device, 9); - setmem_var_op()(this->ctx, stress_device, 0, 9); + setmem_var_op()(stress_device, 0, 9); std::vector sigmanlc(9, 0.0); hamilt::FS_Nonlocal_tools nl_tools(&nlpp_in, &ucell_in, p_kv, wfc_basis, p_sf, wg, &ekb); diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp index 4c61d0841d..919be07ea3 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp @@ -23,7 +23,7 @@ void Stress_Func::stress_onsite(ModuleBase::matrix& sigma, FPTYPE* stress_device = nullptr; resmem_var_op()(stress_device, 9); - setmem_var_op()(this->ctx, stress_device, 0, 9); + setmem_var_op()(stress_device, 0, 9); std::vector sigma_onsite(9, 0.0); auto* onsite_p = projectors::OnsiteProjector::get_instance(); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp index dd5bbf9198..a0b8543eff 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp @@ -218,7 +218,7 @@ void Sto_Forces::cal_sto_force_nl( // allocate memory for the force FPTYPE* force = nullptr; resmem_var_op()(force, ucell.nat * 3); - base_device::memory::set_memory_op()(this->ctx, force, 0.0, ucell.nat * 3); + base_device::memory::set_memory_op()(force, 0.0, ucell.nat * 3); hamilt::FS_Nonlocal_tools nl_tools(&nlpp, &ucell, p_kv, wfc_basis, p_sf, wg, nullptr); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp index 7258227602..adca009460 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp @@ -362,7 +362,7 @@ void Stochastic_Iter::calPn(const int& ik, Stochastic_WF& } else { - setmem_var_op()(this->ctx, spolyv, 0, norder * norder); + setmem_var_op()(spolyv, 0, norder * norder); } } T* pchi; @@ -597,7 +597,7 @@ void Stochastic_Iter::cal_storho(const UnitCell& ucell, } for (int is = 0; is < nspin; is++) { - setmem_var_op()(this->ctx, pes->rho[is], 0, nrxx); + setmem_var_op()(pes->rho[is], 0, nrxx); } for (int ik = 0; ik < this->pkv->get_nks(); ++ik) { diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp index adc3116109..770b7319dd 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp @@ -168,7 +168,7 @@ void Sto_Stress_PW::sto_stress_nl(ModuleBase::matrix& sigma, // allocate memory for the stress FPTYPE* stress_device = nullptr; resmem_var_op()(stress_device, 9); - setmem_var_op()(this->ctx, stress_device, 0, 9); + setmem_var_op()(stress_device, 0, 9); std::vector sigmanlc(9, 0.0); hamilt::FS_Nonlocal_tools nl_tools(&nlpp, &ucell, p_kv, wfc_basis, p_sf, wg, nullptr); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp index 8a76daa9e9..dfec4f3a05 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp @@ -52,7 +52,7 @@ void Stochastic_WF::allocate_chiallorder(const int& norder) for (int ik = 0; ik < this->nks; ++ik) { chiallorder[ik].resize(1, this->nchip[ik] * this->npwx, norder); - setmem_complex_op()(chiallorder[ik].get_device(), chiallorder[ik].get_pointer(), 0, chiallorder[ik].size()); + setmem_complex_op()(chiallorder[ik].get_pointer(), 0, chiallorder[ik].size()); } } diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 8486771167..8d156035ae 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -47,23 +47,23 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< resmem_complex_op()(this->psi_in_iter, this->nbase_x * this->dim, "DAV::psi_in_iter"); - setmem_complex_op()(this->ctx, this->psi_in_iter, 0, this->nbase_x * this->dim); + setmem_complex_op()(this->psi_in_iter, 0, this->nbase_x * this->dim); // the product of H and psi in the reduced psi set resmem_complex_op()(this->hphi, this->nbase_x * this->dim, "DAV::hphi"); - setmem_complex_op()(this->ctx, this->hphi, 0, this->nbase_x * this->dim); + setmem_complex_op()(this->hphi, 0, this->nbase_x * this->dim); // Hamiltonian on the reduced psi set resmem_complex_op()(this->hcc, this->nbase_x * this->nbase_x, "DAV::hcc"); - setmem_complex_op()(this->ctx, this->hcc, 0, this->nbase_x * this->nbase_x); + setmem_complex_op()(this->hcc, 0, this->nbase_x * this->nbase_x); // Overlap on the reduced psi set resmem_complex_op()(this->scc, this->nbase_x * this->nbase_x, "DAV::scc"); - setmem_complex_op()(this->ctx, this->scc, 0, this->nbase_x * this->nbase_x); + setmem_complex_op()(this->scc, 0, this->nbase_x * this->nbase_x); // Eigenvectors resmem_complex_op()(this->vcc, this->nbase_x * this->nbase_x, "DAV::vcc"); - setmem_complex_op()(this->ctx, this->vcc, 0, this->nbase_x * this->nbase_x); + setmem_complex_op()(this->vcc, 0, this->nbase_x * this->nbase_x); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #if defined(__CUDA) || defined(__ROCM) @@ -190,7 +190,7 @@ int Diago_DavSubspace::diag_once(const HPsiFunc& hpsi_func, ModuleBase::timer::tick("Diago_DavSubspace", "last"); // updata eigenvectors of Hamiltonian - setmem_complex_op()(this->ctx, psi_in, 0, n_band * psi_in_dmax); + setmem_complex_op()(psi_in, 0, n_band * psi_in_dmax); #ifdef __DSP gemm_op_mt() // In order to not coding another whole template, using this method to minimize the code change. @@ -722,9 +722,9 @@ void Diago_DavSubspace::refresh(const int& dim, // set hcc/scc/vcc to 0 for (size_t i = 0; i < nbase; i++) { - setmem_complex_op()(this->ctx, &hcc[this->nbase_x * i], 0, nbase); - setmem_complex_op()(this->ctx, &scc[this->nbase_x * i], 0, nbase); - setmem_complex_op()(this->ctx, &vcc[this->nbase_x * i], 0, nbase); + setmem_complex_op()(&hcc[this->nbase_x * i], 0, nbase); + setmem_complex_op()(&scc[this->nbase_x * i], 0, nbase); + setmem_complex_op()(&vcc[this->nbase_x * i], 0, nbase); } if (this->device == base_device::GpuDevice) diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index fcf35472f4..24d4a40429 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -60,25 +60,24 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, // the lowest N eigenvalues base_device::memory::resize_memory_op()(this->eigenvalue, nbase_x, "DAV::eig"); - base_device::memory::set_memory_op()( - this->cpu_ctx, this->eigenvalue, 0, nbase_x); + base_device::memory::set_memory_op()(this->eigenvalue, 0, nbase_x); // basis(dim, nbase_x), leading dimension = dim resmem_complex_op()(basis, nbase_x * dim, "DAV::basis"); - setmem_complex_op()(this->ctx, basis, 0, nbase_x * dim); + setmem_complex_op()(basis, 0, nbase_x * dim); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // hpsi(nbase_x, dim); // the product of H and psi in the reduced basis set resmem_complex_op()(this->hpsi, nbase_x * dim, "DAV::hpsi"); - setmem_complex_op()(this->ctx, this->hpsi, 0, nbase_x * dim); + setmem_complex_op()(this->hpsi, 0, nbase_x * dim); // spsi(nbase_x, dim); // the Product of S and psi in the reduced basis set resmem_complex_op()(this->spsi, nbase_x * dim, "DAV::spsi"); - setmem_complex_op()(this->ctx, this->spsi, 0, nbase_x * dim); + setmem_complex_op()(this->spsi, 0, nbase_x * dim); // hcc(nbase_x, nbase_x); // Hamiltonian on the reduced basis resmem_complex_op()(this->hcc, nbase_x * nbase_x, "DAV::hcc"); - setmem_complex_op()(this->ctx, this->hcc, 0, nbase_x * nbase_x); + setmem_complex_op()(this->hcc, 0, nbase_x * nbase_x); // scc(nbase_x, nbase_x); // Overlap on the reduced basis // resmem_complex_op()(this->ctx, this->scc, nbase_x * nbase_x, "DAV::scc"); @@ -86,12 +85,12 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, // vcc(nbase_x, nbase_x); // Eigenvectors of hcc resmem_complex_op()(this->vcc, nbase_x * nbase_x, "DAV::vcc"); - setmem_complex_op()(this->ctx, this->vcc, 0, nbase_x * nbase_x); + setmem_complex_op()(this->vcc, 0, nbase_x * nbase_x); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // lagrange_matrix(nband, nband); // for orthogonalization resmem_complex_op()(this->lagrange_matrix, nband * nband); - setmem_complex_op()(this->ctx, this->lagrange_matrix, 0, nband * nband); + setmem_complex_op()(this->lagrange_matrix, 0, nband * nband); #if defined(__CUDA) || defined(__ROCM) // device precondition array @@ -265,7 +264,7 @@ int DiagoDavid::diag_once(const HPsiFunc& hpsi_func, // update eigenvectors of Hamiltonian - setmem_complex_op()(this->ctx, psi_in, 0, nband * ld_psi); + setmem_complex_op()(psi_in, 0, nband * ld_psi); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< gemm_op()(this->ctx, 'N', @@ -353,7 +352,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // eigenvectors of unconverged index extracted from vcc T* vc_ev_vector = nullptr; resmem_complex_op()(vc_ev_vector, notconv * nbase); - setmem_complex_op()(this->ctx, vc_ev_vector, 0, notconv * nbase); + setmem_complex_op()(vc_ev_vector, 0, notconv * nbase); //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // for (int m = 0; m < notconv; m++) @@ -499,7 +498,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // plan for SchmidtOrth T* lagrange = nullptr; resmem_complex_op()(lagrange, notconv * (nbase + notconv)); - setmem_complex_op()(this->ctx, lagrange, 0, notconv * (nbase + notconv)); + setmem_complex_op()(lagrange, 0, notconv * (nbase + notconv)); std::vector pre_matrix_mm_m(notconv, 0); std::vector pre_matrix_mv_m(notconv, 1); @@ -751,7 +750,7 @@ void DiagoDavid::refresh(const int& dim, ModuleBase::timer::tick("DiagoDavid", "refresh"); // update hp,sp - setmem_complex_op()(this->ctx, basis , 0, nbase_x * dim); + setmem_complex_op()(basis , 0, nbase_x * dim); // basis(dim, nband) = hpsi(dim, nbase) * vcc(nbase, nband) gemm_op()(this->ctx, @@ -800,7 +799,7 @@ void DiagoDavid::refresh(const int& dim, }*/ // update basis - setmem_complex_op()(this->ctx, basis , 0, nbase_x * dim); + setmem_complex_op()(basis , 0, nbase_x * dim); for (int m = 0; m < nband; m++) { @@ -813,7 +812,7 @@ void DiagoDavid::refresh(const int& dim, // basis set size reset to nband nbase = nband; - setmem_complex_op()(this->ctx, hcc, 0, nbase_x * nbase_x); + setmem_complex_op()(hcc, 0, nbase_x * nbase_x); // setmem_complex_op()(this->ctx, scc, 0, nbase_x * nbase_x); diff --git a/source/module_hsolver/diago_iter_assist.cpp b/source/module_hsolver/diago_iter_assist.cpp index d216ad02fa..4f4aabd26c 100644 --- a/source/module_hsolver/diago_iter_assist.cpp +++ b/source/module_hsolver/diago_iter_assist.cpp @@ -45,9 +45,9 @@ void DiagoIterAssist::diagH_subspace(const hamilt::Hamilt* resmem_complex_op()(hcc, nstart * nstart, "DiagSub::hcc"); resmem_complex_op()(scc, nstart * nstart, "DiagSub::scc"); resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); - setmem_complex_op()(ctx, hcc, 0, nstart * nstart); - setmem_complex_op()(ctx, scc, 0, nstart * nstart); - setmem_complex_op()(ctx, vcc, 0, nstart * nstart); + setmem_complex_op()(hcc, 0, nstart * nstart); + setmem_complex_op()(scc, 0, nstart * nstart); + setmem_complex_op()(vcc, 0, nstart * nstart); const int dmin = psi.get_current_ngk(); const int dmax = psi.get_nbasis(); @@ -65,7 +65,7 @@ void DiagoIterAssist::diagH_subspace(const hamilt::Hamilt* } { // code block to calculate hcc and scc - setmem_complex_op()(ctx, temp, 0, nstart * dmax); + setmem_complex_op()(temp, 0, nstart * dmax); T* hphi = temp; // do hPsi for all bands @@ -195,9 +195,9 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* resmem_complex_op()(hcc, nstart * nstart, "DiagSub::hcc"); resmem_complex_op()(scc, nstart * nstart, "DiagSub::scc"); resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); - setmem_complex_op()(ctx, hcc, 0, nstart * nstart); - setmem_complex_op()(ctx, scc, 0, nstart * nstart); - setmem_complex_op()(ctx, vcc, 0, nstart * nstart); + setmem_complex_op()(hcc, 0, nstart * nstart); + setmem_complex_op()(scc, 0, nstart * nstart); + setmem_complex_op()(vcc, 0, nstart * nstart); if (base_device::get_device_type(ctx) == base_device::GpuDevice) { @@ -207,7 +207,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* // hpsi and spsi share the temp space T* temp = nullptr; resmem_complex_op()(temp, psi_nc, "DiagSub::temp"); - setmem_complex_op()(ctx, temp, 0, psi_nc); + setmem_complex_op()(temp, 0, psi_nc); T* hpsi = temp; // do hPsi band by band @@ -256,7 +256,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* // hpsi and spsi share the temp space T* temp = nullptr; resmem_complex_op()(temp, nstart * psi_nc, "DiagSub::temp"); - setmem_complex_op()(ctx, temp, 0, nstart * psi_nc); + setmem_complex_op()(temp, 0, nstart * psi_nc); T* hpsi = temp; // do hPsi for all bands @@ -378,7 +378,7 @@ void DiagoIterAssist::diagH_LAPACK(const int nstart, Real* eigenvalues = nullptr; resmem_var_op()(eigenvalues, nstart); - setmem_var_op()(ctx, eigenvalues, 0, nstart); + setmem_var_op()(eigenvalues, 0, nstart); dngvd_op()(ctx, nstart, ldh, hcc, scc, eigenvalues, vcc); @@ -423,18 +423,18 @@ void DiagoIterAssist::cal_hs_subspace(const hamilt::Hamilt { const int nstart = psi.get_nbands(); - setmem_complex_op()(ctx, hcc, 0, nstart * nstart); - setmem_complex_op()(ctx, scc, 0, nstart * nstart); + setmem_complex_op()(hcc, 0, nstart * nstart); + setmem_complex_op()(scc, 0, nstart * nstart); const int dmin = psi.get_current_ngk(); const int dmax = psi.get_nbasis(); T* temp = nullptr; resmem_complex_op()(temp, nstart * dmax, "DiagSub::temp"); - setmem_complex_op()(ctx, temp, 0, nstart * dmax); + setmem_complex_op()(temp, 0, nstart * dmax); { // code block to calculate hcc and scc - setmem_complex_op()(ctx, temp, 0, nstart * dmax); + setmem_complex_op()(temp, 0, nstart * dmax); T* hphi = temp; // do hPsi for all bands @@ -503,7 +503,7 @@ void DiagoIterAssist::diag_responce( const T* hcc, T *vcc = nullptr; resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); - setmem_complex_op()(ctx, vcc, 0, nstart * nstart); + setmem_complex_op()(vcc, 0, nstart * nstart); // after generation of H and S matrix, diag them DiagoIterAssist::diagH_LAPACK(nstart, nstart, hcc, scc, nstart, en, vcc); @@ -546,7 +546,7 @@ void DiagoIterAssist::diag_subspace_psi(const T* hcc, T *vcc = nullptr; resmem_complex_op()(vcc, nstart * nstart, "DiagSub::vcc"); - setmem_complex_op()(ctx, vcc, 0, nstart * nstart); + setmem_complex_op()(vcc, 0, nstart * nstart); // after generation of H and S matrix, diag them DiagoIterAssist::diagH_LAPACK(nstart, nstart, hcc, scc, nstart, en, vcc); @@ -556,7 +556,7 @@ void DiagoIterAssist::diag_subspace_psi(const T* hcc, const int dmax = evc.get_nbasis(); T* temp = nullptr; resmem_complex_op()(temp, nstart * dmax, "DiagSub::temp"); - setmem_complex_op()(ctx, temp, 0, nstart * dmax); + setmem_complex_op()(temp, 0, nstart * dmax); gemm_op()(ctx, 'N', 'N', diff --git a/source/module_psi/psi.cpp b/source/module_psi/psi.cpp index 56d470e8a0..0fdbe5d742 100644 --- a/source/module_psi/psi.cpp +++ b/source/module_psi/psi.cpp @@ -496,7 +496,7 @@ template void Psi::zero_out() { // this->psi.assign(this->psi.size(), T(0)); - set_memory_op()(this->ctx, this->psi, 0, this->size()); + set_memory_op()(this->psi, 0, this->size()); } template From 97aa0c8c3bd5f7307d82babeaf67839874a57899 Mon Sep 17 00:00:00 2001 From: critsium-xy Date: Thu, 16 Jan 2025 17:55:44 +0800 Subject: [PATCH 5/7] Remove ctx parameters in sync_memory_op --- python/pyabacus/src/hsolver/py_diago_cg.hpp | 2 - .../pyabacus/src/hsolver/py_diago_david.hpp | 2 +- .../module_base/kernels/dsp/dsp_connector.h | 8 +-- .../module_base/kernels/test/math_op_test.cpp | 8 +-- source/module_base/math_chebyshev.cpp | 20 +++---- .../module_device/cuda/memory_op.cu | 14 +---- .../module_base/module_device/memory_op.cpp | 16 ++---- source/module_base/module_device/memory_op.h | 18 ++---- .../module_device/rocm/memory_op.hip.cu | 9 +-- .../module_device/test/memory_test.cpp | 26 +++------ source/module_base/parallel_device.h | 8 +-- .../module_base/test/blas_connector_test.cpp | 18 +++--- .../module_pw/kernels/test/pw_op_test.cpp | 22 ++++---- source/module_basis/module_pw/pw_basis_k.cpp | 10 ++-- .../module_basis/module_pw/pw_basis_sup.cpp | 2 +- .../module_basis/module_pw/pw_distributeg.cpp | 2 +- .../module_basis/module_pw/pw_transform_k.cpp | 6 +- .../kernels/test/elecstate_op_test.cpp | 14 ++--- .../potentials/potential_new.cpp | 8 +-- source/module_hamilt_general/hamilt.h | 2 +- source/module_hamilt_general/operator.cpp | 2 +- .../module_deltaspin/cal_mw_from_lambda.cpp | 16 +++--- .../hamilt_pwdft/VNL_in_pw.cpp | 26 ++++----- .../hamilt_pwdft/forces_cc.cpp | 24 ++++---- .../hamilt_pwdft/forces_nl.cpp | 2 +- .../hamilt_pwdft/forces_onsite.cpp | 2 +- .../hamilt_pwdft/forces_scc.cpp | 12 ++-- .../hamilt_pwdft/fs_kin_tools.cpp | 4 +- .../hamilt_pwdft/fs_nonlocal_tools.cpp | 26 ++++----- .../hamilt_pwdft/hamilt_pw.cpp | 2 +- .../kernels/test/ekinetic_op_test.cpp | 8 +-- .../kernels/test/force_op_test.cpp | 28 +++++----- .../kernels/test/meta_op_test.cpp | 10 ++-- .../kernels/test/nonlocal_op_test.cpp | 16 +++--- .../kernels/test/stress_op_test.cpp | 38 ++++++------- .../kernels/test/veff_op_test.cpp | 16 +++--- .../hamilt_pwdft/kernels/test/vnl_op_test.cpp | 26 ++++----- .../hamilt_pwdft/kernels/test/wf_op_test.cpp | 20 +++---- .../hamilt_pwdft/nonlocal_maths.hpp | 4 +- .../hamilt_pwdft/onsite_proj_tools.cpp | 48 ++++++++-------- .../hamilt_pwdft/onsite_projector.cpp | 2 +- .../operator_pw/onsite_proj_pw.cpp | 14 ++--- .../hamilt_pwdft/stress_func_cc.cpp | 10 ++-- .../hamilt_pwdft/stress_func_loc.cpp | 12 ++-- .../hamilt_pwdft/stress_func_nl.cpp | 2 +- .../hamilt_pwdft/stress_func_onsite.cpp | 2 +- .../hamilt_pwdft/structure_factor.cpp | 6 +- .../hamilt_pwdft/structure_factor_k.cpp | 4 +- .../module_hamilt_pw/hamilt_stodft/sto_che.h | 4 +- .../hamilt_stodft/sto_forces.cpp | 2 +- .../hamilt_stodft/sto_iter.cpp | 14 ++--- .../hamilt_stodft/sto_stress_pw.cpp | 2 +- .../module_hamilt_pw/hamilt_stodft/sto_wf.cpp | 4 +- source/module_hsolver/diago_dav_subspace.cpp | 42 +++++++------- source/module_hsolver/diago_david.cpp | 32 +++++------ source/module_hsolver/diago_iter_assist.cpp | 10 ++-- source/module_hsolver/hsolver_pw.cpp | 2 - .../kernels/cuda/math_kernel_op.cu | 6 +- .../kernels/rocm/math_kernel_op.hip.cu | 6 +- .../kernels/test/math_dngvd_test.cpp | 4 +- .../kernels/test/math_kernel_test.cpp | 56 +++++++++---------- .../kernels/test/perf_math_kernel.cpp | 8 +-- source/module_psi/psi.cpp | 10 +--- source/module_psi/psi_init.cpp | 4 +- 64 files changed, 355 insertions(+), 448 deletions(-) diff --git a/python/pyabacus/src/hsolver/py_diago_cg.hpp b/python/pyabacus/src/hsolver/py_diago_cg.hpp index f1f84e9a77..f907e2e764 100644 --- a/python/pyabacus/src/hsolver/py_diago_cg.hpp +++ b/python/pyabacus/src/hsolver/py_diago_cg.hpp @@ -153,8 +153,6 @@ class PyDiagoCG const int nrow = ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1); const int nbands = ndim == 1 ? 1 : psi_in.shape().dim_size(0); syncmem_z2z_h2h_op()( - this->ctx, - this->ctx, spsi_out.data>(), psi_in.data>(), static_cast(nrow * nbands) diff --git a/python/pyabacus/src/hsolver/py_diago_david.hpp b/python/pyabacus/src/hsolver/py_diago_david.hpp index 8a8d2c727e..7087af632e 100644 --- a/python/pyabacus/src/hsolver/py_diago_david.hpp +++ b/python/pyabacus/src/hsolver/py_diago_david.hpp @@ -135,7 +135,7 @@ class PyDiagoDavid const int nrow, const int nbands ) { - syncmem_op()(this->ctx, this->ctx, spsi_out, psi_in, static_cast(nbands * nrow)); + syncmem_op()(spsi_out, psi_in, static_cast(nbands * nrow)); }; obj = std::make_unique, base_device::DEVICE_CPU>>( diff --git a/source/module_base/kernels/dsp/dsp_connector.h b/source/module_base/kernels/dsp/dsp_connector.h index b51c67663e..ea0d17749e 100644 --- a/source/module_base/kernels/dsp/dsp_connector.h +++ b/source/module_base/kernels/dsp/dsp_connector.h @@ -75,7 +75,7 @@ void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv auto* swap = new T[notconv * nbase_x]; auto* target = new T[notconv * nbase_x]; - syncmem_complex_op()(cpu_ctx, cpu_ctx, swap, hcc + nbase * nbase_x, notconv * nbase_x); + syncmem_complex_op()(swap, hcc + nbase * nbase_x, notconv * nbase_x); if (base_device::get_current_precision(swap) == "single") { MPI_Reduce(swap, @@ -97,8 +97,8 @@ void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv diag_comm); } - syncmem_complex_op()(cpu_ctx, cpu_ctx, hcc + nbase * nbase_x, target, notconv * nbase_x); - syncmem_complex_op()(cpu_ctx, cpu_ctx, swap, scc + nbase * nbase_x, notconv * nbase_x); + syncmem_complex_op()(hcc + nbase * nbase_x, target, notconv * nbase_x); + syncmem_complex_op()(swap, scc + nbase * nbase_x, notconv * nbase_x); if (base_device::get_current_precision(swap) == "single") { @@ -121,7 +121,7 @@ void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv diag_comm); } - syncmem_complex_op()(cpu_ctx, cpu_ctx, scc + nbase * nbase_x, target, notconv * nbase_x); + syncmem_complex_op()(scc + nbase * nbase_x, target, notconv * nbase_x); delete[] swap; delete[] target; } diff --git a/source/module_base/kernels/test/math_op_test.cpp b/source/module_base/kernels/test/math_op_test.cpp index 17ea191656..6d3aa8d10a 100644 --- a/source/module_base/kernels/test/math_op_test.cpp +++ b/source/module_base/kernels/test/math_op_test.cpp @@ -310,9 +310,9 @@ TEST_F(TestModuleBaseMathMultiDevice, cal_ylm_real_op_gpu) resmem_var_op()(d_p, p.size()); resmem_var_op()(d_ylm, ylm.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_g, g.data(), g.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_p, p.data(), p.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_ylm, ylm.data(), ylm.size()); + syncmem_var_h2d_op()(d_g, g.data(), g.size()); + syncmem_var_h2d_op()(d_p, p.data(), p.size()); + syncmem_var_h2d_op()(d_ylm, ylm.data(), ylm.size()); ModuleBase::cal_ylm_real_op()(gpu_ctx, ng, @@ -326,7 +326,7 @@ TEST_F(TestModuleBaseMathMultiDevice, cal_ylm_real_op_gpu) d_p, d_ylm); - syncmem_var_d2h_op()(cpu_ctx, gpu_ctx, ylm.data(), d_ylm, ylm.size()); + syncmem_var_d2h_op()(ylm.data(), d_ylm, ylm.size()); for (int ii = 0; ii < ylm.size(); ii++) { EXPECT_LT(fabs(ylm[ii] - expected_ylm[ii]), 6e-5); diff --git a/source/module_base/math_chebyshev.cpp b/source/module_base/math_chebyshev.cpp index a5190ba34a..a152d20505 100644 --- a/source/module_base/math_chebyshev.cpp +++ b/source/module_base/math_chebyshev.cpp @@ -131,7 +131,7 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, REAL* dot_device = nullptr; resmem_var_op()(dot_device, 1); container::kernels::blas_dot()(dim2, pL, 1, pR, 1, dot_device); - syncmem_var_d2h_op()(cpu_ctx, this->ctx, &result, dot_device, 1); + syncmem_var_d2h_op()(&result, dot_device, 1); delmem_var_op()(this->ctx, dot_device); } else @@ -146,7 +146,7 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, int dim2 = 2 * N; container::kernels::blas_dot()(dim2, pL, 1, pR, 1, dot_device); REAL result_temp = 0; - syncmem_var_d2h_op()(cpu_ctx, this->ctx, &result_temp, dot_device, 1); + syncmem_var_d2h_op()(&result_temp, dot_device, 1); result += result_temp; pL += 2 * LDA; pR += 2 * LDA; @@ -211,7 +211,7 @@ void Chebyshev::calcoef_real(std::function fun) if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, coef_real, coefr_cpu, norder); + syncmem_var_h2d_op()(coef_real, coefr_cpu, norder); } getcoef_real = true; @@ -301,7 +301,7 @@ void Chebyshev::calcoef_complex(std::function(s } if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) { - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, coef_complex, coefc_cpu, norder); + syncmem_complex_h2d_op()(coef_complex, coefc_cpu, norder); } getcoef_complex = true; @@ -392,7 +392,7 @@ void Chebyshev::calcoef_pair(std::function fun1, std:: if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) { - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, coef_complex, coefc_cpu, norder); + syncmem_complex_h2d_op()(coef_complex, coefc_cpu, norder); } getcoef_complex = true; @@ -431,7 +431,7 @@ void Chebyshev::calfinalvec_real( resmem_complex_op()(arrayn, ndmxt); resmem_complex_op()(arrayn_1, ndmxt); - memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, ndmxt); + memcpy_complex_op()(arrayn_1, wavein, ndmxt); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt); funA(arrayn_1, arrayn, m); @@ -500,7 +500,7 @@ void Chebyshev::calfinalvec_complex( resmem_complex_op()(arrayn, ndmxt); resmem_complex_op()(arrayn_1, ndmxt); - memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, ndmxt); + memcpy_complex_op()(arrayn_1, wavein, ndmxt); funA(arrayn_1, arrayn, m); @@ -553,7 +553,7 @@ void Chebyshev::calpolyvec_complex( std::complex*tmpin = wavein, *tmpout = arrayn_1; for (int i = 0; i < m; ++i) { - memcpy_complex_op()(this->ctx, this->ctx, tmpout, tmpin, N); + memcpy_complex_op()(tmpout, tmpin, N); // ModuleBase::GlobalFunc::DCOPY(tmpin, tmpout, N); tmpin += LDA; tmpout += LDA; @@ -599,7 +599,7 @@ void Chebyshev::tracepolyA( resmem_complex_op()(arrayn, ndmxt); resmem_complex_op()(arrayn_1, ndmxt); - memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, ndmxt); + memcpy_complex_op()(arrayn_1, wavein, ndmxt); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt); funA(arrayn_1, arrayn, m); @@ -673,7 +673,7 @@ bool Chebyshev::checkconverge( resmem_complex_op()(arrayn, LDA); resmem_complex_op()(arrayn_1, LDA); - memcpy_complex_op()(this->ctx, this->ctx, arrayn_1, wavein, N); + memcpy_complex_op()(arrayn_1, wavein, N); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, N); if (tmin == tmax) diff --git a/source/module_base/module_device/cuda/memory_op.cu b/source/module_base/module_device/cuda/memory_op.cu index de481fe90b..1fecb1b66e 100644 --- a/source/module_base/module_device/cuda/memory_op.cu +++ b/source/module_base/module_device/cuda/memory_op.cu @@ -87,8 +87,6 @@ void set_memory_op::operator()(FPTYPE* arr, template void synchronize_memory_op::operator()( - const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) @@ -98,8 +96,6 @@ void synchronize_memory_op void synchronize_memory_op::operator()( - const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) @@ -109,8 +105,6 @@ void synchronize_memory_op void synchronize_memory_op::operator()( - const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) @@ -150,9 +144,7 @@ struct cast_memory_op::value) { - synchronize_memory_op()(dev_out, - dev_in, - arr_out, + synchronize_memory_op()(arr_out, reinterpret_cast(arr_in), size); return; @@ -178,9 +170,7 @@ struct cast_memory_op::value) { - synchronize_memory_op()(dev_out, - dev_in, - arr_out, + synchronize_memory_op()(arr_out, reinterpret_cast(arr_in), size); return; diff --git a/source/module_base/module_device/memory_op.cpp b/source/module_base/module_device/memory_op.cpp index b18e2cea39..2365467b70 100644 --- a/source/module_base/module_device/memory_op.cpp +++ b/source/module_base/module_device/memory_op.cpp @@ -58,9 +58,7 @@ struct set_memory_op template struct synchronize_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { @@ -174,9 +172,7 @@ struct set_memory_op template struct synchronize_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { @@ -186,9 +182,7 @@ struct synchronize_memory_op struct synchronize_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { @@ -198,9 +192,7 @@ struct synchronize_memory_op struct synchronize_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { diff --git a/source/module_base/module_device/memory_op.h b/source/module_base/module_device/memory_op.h index 11f9fd7a9e..ca3457d28b 100644 --- a/source/module_base/module_device/memory_op.h +++ b/source/module_base/module_device/memory_op.h @@ -46,16 +46,12 @@ struct synchronize_memory_op /// @brief memcpy for multi-device /// /// Input Parameters - /// \param dev_out : the type of computing device of arr_out - /// \param dev_in : the type of computing device of arr_in /// \param arr_in : input array /// \param size : array size /// /// Output Parameters /// \param arr_out : output array initialized by the input array - void operator()(const Device_out* dev_out, - const Device_in* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; @@ -125,27 +121,21 @@ struct set_memory_op template struct synchronize_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; template struct synchronize_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; template struct synchronize_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE* arr_out, + void operator()(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; diff --git a/source/module_base/module_device/rocm/memory_op.hip.cu b/source/module_base/module_device/rocm/memory_op.hip.cu index 96fb12a35c..b51257d28d 100644 --- a/source/module_base/module_device/rocm/memory_op.hip.cu +++ b/source/module_base/module_device/rocm/memory_op.hip.cu @@ -51,8 +51,7 @@ void resize_memory_op::operator()(FPTYPE*& arr, } template -void set_memory_op::operator()(const base_device::DEVICE_GPU* dev, - FPTYPE* arr, +void set_memory_op::operator()(FPTYPE* arr, const int var, const size_t size) { @@ -61,8 +60,6 @@ void set_memory_op::operator()(const base_devic template void synchronize_memory_op::operator()( - const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) @@ -72,8 +69,6 @@ void synchronize_memory_op void synchronize_memory_op::operator()( - const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) @@ -83,8 +78,6 @@ void synchronize_memory_op void synchronize_memory_op::operator()( - const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) diff --git a/source/module_base/module_device/test/memory_test.cpp b/source/module_base/module_device/test/memory_test.cpp index a2d08f1f5b..fced574e5c 100644 --- a/source/module_base/module_device/test/memory_test.cpp +++ b/source/module_base/module_device/test/memory_test.cpp @@ -141,7 +141,7 @@ TEST_F(TestModulePsiMemory, resize_memory_op_comlex_double_cpu) TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_cpu) { std::vector h_xx(xx.size(), 0); - synchronize_memory_double_cpu_to_cpu_op()(cpu_ctx, cpu_ctx, h_xx.data(), xx.data(), xx.size()); + synchronize_memory_double_cpu_to_cpu_op()(h_xx.data(), xx.data(), xx.size()); for (int ii = 0; ii < z_xx.size(); ii++) { EXPECT_EQ(h_xx[ii], xx[ii]); @@ -151,7 +151,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_cpu) TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_cpu) { std::vector> hz_xx(z_xx.size(), std::complex(0, 0)); - synchronize_memory_complex_double_cpu_to_cpu_op()(cpu_ctx, cpu_ctx, hz_xx.data(), z_xx.data(), z_xx.size()); + synchronize_memory_complex_double_cpu_to_cpu_op()(hz_xx.data(), z_xx.data(), z_xx.size()); for (int ii = 0; ii < z_xx.size(); ii++) { EXPECT_EQ(hz_xx[ii], z_xx[ii]); @@ -236,7 +236,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_gpu) thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); std::vector hv_xx(xx.size(), 0); thrust::copy(hv_xx.begin(), hv_xx.end(), d_xx); - synchronize_memory_double_cpu_to_gpu_op()(gpu_ctx, cpu_ctx, thrust::raw_pointer_cast(d_xx), xx.data(), xx.size()); + synchronize_memory_double_cpu_to_gpu_op()(thrust::raw_pointer_cast(d_xx), xx.data(), xx.size()); thrust::host_vector h_xx(xx.size()); thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); @@ -252,9 +252,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_double_gpu_to_cpu) thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); thrust::copy(xx.begin(), xx.end(), d_xx); thrust::host_vector h_xx(xx.size()); - synchronize_memory_double_gpu_to_cpu_op()(cpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(h_xx.data()), + synchronize_memory_double_gpu_to_cpu_op()(thrust::raw_pointer_cast(h_xx.data()), thrust::raw_pointer_cast(d_xx), xx.size()); @@ -270,9 +268,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_double_gpu_to_gpu) thrust::device_ptr d1_xx = thrust::device_malloc(xx.size()); thrust::device_ptr d2_xx = thrust::device_malloc(xx.size()); thrust::copy(xx.begin(), xx.end(), d1_xx); - synchronize_memory_double_gpu_to_gpu_op()(gpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(d2_xx), + synchronize_memory_double_gpu_to_gpu_op()(thrust::raw_pointer_cast(d2_xx), thrust::raw_pointer_cast(d1_xx), xx.size()); @@ -291,9 +287,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_gpu) thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); std::vector> hvz_xx(z_xx.size(), 0); thrust::copy(hvz_xx.begin(), hvz_xx.end(), dz_xx); - synchronize_memory_complex_double_cpu_to_gpu_op()(gpu_ctx, - cpu_ctx, - thrust::raw_pointer_cast(dz_xx), + synchronize_memory_complex_double_cpu_to_gpu_op()(thrust::raw_pointer_cast(dz_xx), z_xx.data(), z_xx.size()); @@ -311,9 +305,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_cpu) thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); thrust::host_vector> hz_xx(z_xx.size()); - synchronize_memory_complex_double_gpu_to_cpu_op()(cpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(hz_xx.data()), + synchronize_memory_complex_double_gpu_to_cpu_op()(thrust::raw_pointer_cast(hz_xx.data()), thrust::raw_pointer_cast(dz_xx), z_xx.size()); @@ -329,9 +321,7 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_gpu) thrust::device_ptr> dz1_xx = thrust::device_malloc>(z_xx.size()); thrust::device_ptr> dz2_xx = thrust::device_malloc>(z_xx.size()); thrust::copy(z_xx.begin(), z_xx.end(), dz1_xx); - synchronize_memory_complex_double_gpu_to_gpu_op()(gpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(dz2_xx), + synchronize_memory_complex_double_gpu_to_gpu_op()(thrust::raw_pointer_cast(dz2_xx), thrust::raw_pointer_cast(dz1_xx), z_xx.size()); diff --git a/source/module_base/parallel_device.h b/source/module_base/parallel_device.h index 51a6320bff..9b43d668b4 100644 --- a/source/module_base/parallel_device.h +++ b/source/module_base/parallel_device.h @@ -44,7 +44,7 @@ void bcast_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm, { object_cpu = tmp_space; } - base_device::memory::synchronize_memory_op()(cpu_ctx, ctx, object_cpu, object, n); + base_device::memory::synchronize_memory_op()(object_cpu, object, n); } else { @@ -55,7 +55,7 @@ void bcast_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm, if (base_device::get_device_type(ctx) == base_device::GpuDevice) { - base_device::memory::synchronize_memory_op()(ctx, cpu_ctx, object, object_cpu, n); + base_device::memory::synchronize_memory_op()(object, object_cpu, n); if(alloc) { base_device::memory::delete_memory_op()(cpu_ctx, object_cpu); @@ -81,7 +81,7 @@ void reduce_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm { object_cpu = tmp_space; } - base_device::memory::synchronize_memory_op()(cpu_ctx, ctx, object_cpu, object, n); + base_device::memory::synchronize_memory_op()(object_cpu, object, n); } else { @@ -92,7 +92,7 @@ void reduce_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm if (base_device::get_device_type(ctx) == base_device::GpuDevice) { - base_device::memory::synchronize_memory_op()(ctx, cpu_ctx, object, object_cpu, n); + base_device::memory::synchronize_memory_op()(object, object_cpu, n); if(alloc) { base_device::memory::delete_memory_op()(cpu_ctx, object_cpu); diff --git a/source/module_base/test/blas_connector_test.cpp b/source/module_base/test/blas_connector_test.cpp index 938a1290cb..b4a7107fa5 100644 --- a/source/module_base/test/blas_connector_test.cpp +++ b/source/module_base/test/blas_connector_test.cpp @@ -108,9 +108,9 @@ TEST(blas_connector, ScalGpu) { }; for (int i = 0; i < size; i++) answer[i] = result[i] * scale; - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, result_gpu, result, sizeof(std::complex) * 8); + syncmem_z2z_h2d_op()(result_gpu, result, sizeof(std::complex) * 8); BlasConnector::scal(size,scale,result_gpu,incx,base_device::AbacusDevice_t::GpuDevice); - syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, result, result_gpu, sizeof(std::complex) * 8); + syncmem_z2z_d2h_op()(result, result_gpu, sizeof(std::complex) * 8); delmem_zd_op()(gpu_ctx, result_gpu); // incx is the spacing between elements if result for (int i = 0; i < size; i++) { @@ -210,10 +210,10 @@ TEST(blas_connector, AxpyGpu) { }); for (int i = 0; i < size; i++) answer[i] = x_const[i] * scale + result[i]; - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, result_gpu, result.data(), sizeof(std::complex) * size); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, x_gpu, x_const.data(), sizeof(std::complex) * size); + syncmem_z2z_h2d_op()(result_gpu, result.data(), sizeof(std::complex) * size); + syncmem_z2z_h2d_op()(x_gpu, x_const.data(), sizeof(std::complex) * size); BlasConnector::axpy(size, scale, x_gpu, incx, result_gpu, incy, base_device::AbacusDevice_t::GpuDevice); - syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, result.data(), result_gpu, sizeof(std::complex) * size); + syncmem_z2z_d2h_op()(result.data(), result_gpu, sizeof(std::complex) * size); delmem_zd_op()(gpu_ctx, result_gpu); delmem_zd_op()(gpu_ctx, x_gpu); for (int i = 0; i < size; i++) { @@ -665,13 +665,13 @@ TEST(blas_connector, GemmGpu) { beta_const * result[i + j * ldc]; } } - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, a_gpu, a_const.data(), sizeof(std::complex) * size_k * lda); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, b_gpu, b_const.data(), sizeof(std::complex) * size_n * ldb); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, result_gpu, result.data(), sizeof(std::complex) * size_n * ldc); + syncmem_z2z_h2d_op()(a_gpu, a_const.data(), sizeof(std::complex) * size_k * lda); + syncmem_z2z_h2d_op()(b_gpu, b_const.data(), sizeof(std::complex) * size_n * ldb); + syncmem_z2z_h2d_op()(result_gpu, result.data(), sizeof(std::complex) * size_n * ldc); BlasConnector::gemm_cm(transa_m, transb_m, size_m, size_n, size_k, alpha_const, a_gpu, lda, b_gpu, ldb, beta_const, result_gpu, ldc, base_device::AbacusDevice_t::GpuDevice); - syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, result.data(), result_gpu, sizeof(std::complex) * size_n * ldc); + syncmem_z2z_d2h_op()(result.data(), result_gpu, sizeof(std::complex) * size_n * ldc); delmem_zd_op()(gpu_ctx, result_gpu); delmem_zd_op()(gpu_ctx, a_gpu); delmem_zd_op()(gpu_ctx, b_gpu); diff --git a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp index aeead153ac..4b62b5f8fb 100644 --- a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp +++ b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp @@ -105,13 +105,13 @@ TEST_F(TestModulePWPWMultiDevice, set_3d_fft_box_op_gpu) resize_memory_int_gpu_op()(d_box_index, box_index.size()); resize_memory_complex_gpu_op()(d_res, res.size()); resize_memory_complex_gpu_op()(d_in_1, in_1.size()); - synchronize_memory_int_h2d_op()(gpu_ctx, cpu_ctx, d_box_index, box_index.data(), box_index.size()); - synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); - synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in_1, in_1.data(), in_1.size()); + synchronize_memory_int_h2d_op()(d_box_index, box_index.data(), box_index.size()); + synchronize_memory_complex_h2d_op()(d_res, res.data(), res.size()); + synchronize_memory_complex_h2d_op()(d_in_1, in_1.data(), in_1.size()); set_3d_fft_box_gpu_op()(gpu_ctx, this->npwk, d_box_index, d_in_1, d_res); - synchronize_memory_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + synchronize_memory_complex_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < this->nxyz; ii++) { EXPECT_LT(fabs(res[ii] - out_1[ii]), 1e-12); @@ -127,12 +127,12 @@ TEST_F(TestModulePWPWMultiDevice, set_recip_to_real_output_op_gpu) std::complex* d_res = NULL, * d_in_2 = NULL; resize_memory_complex_gpu_op()(d_res, res.size()); resize_memory_complex_gpu_op()(d_in_2, in_2.size()); - synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); - synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in_2, in_2.data(), in_2.size()); + synchronize_memory_complex_h2d_op()(d_res, res.data(), res.size()); + synchronize_memory_complex_h2d_op()(d_in_2, in_2.data(), in_2.size()); set_recip_to_real_output_gpu_op()(gpu_ctx, this->nxyz, this->add, this->factor, d_in_2, d_res); - synchronize_memory_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + synchronize_memory_complex_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < this->nxyz; ii++) { EXPECT_LT(fabs(res[ii] - out_2[ii]), 1e-12); @@ -149,13 +149,13 @@ TEST_F(TestModulePWPWMultiDevice, set_real_to_recip_output_op_gpu) resize_memory_int_gpu_op()(d_box_index, box_index.size()); resize_memory_complex_gpu_op()(d_res, res.size()); resize_memory_complex_gpu_op()(d_in_3, in_3.size()); - synchronize_memory_int_h2d_op()(gpu_ctx, cpu_ctx, d_box_index, box_index.data(), box_index.size()); - synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); - synchronize_memory_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in_3, in_3.data(), in_3.size()); + synchronize_memory_int_h2d_op()(d_box_index, box_index.data(), box_index.size()); + synchronize_memory_complex_h2d_op()(d_res, res.data(), res.size()); + synchronize_memory_complex_h2d_op()(d_in_3, in_3.data(), in_3.size()); set_real_to_recip_output_gpu_op()(gpu_ctx, this->npwk, this->nxyz, true, this->factor, d_box_index, d_in_3, d_res); - synchronize_memory_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + synchronize_memory_complex_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < out_3.size(); ii++) { EXPECT_LT(fabs(res[ii] - out_3[ii]), 5e-6); diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp index 4e452bd87f..980490dc81 100644 --- a/source/module_basis/module_pw/pw_basis_k.cpp +++ b/source/module_basis/module_pw/pw_basis_k.cpp @@ -103,7 +103,7 @@ void PW_Basis_K:: initparameters( castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } resmem_dd_op()(this->d_kvec_c, this->nks * 3); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); + syncmem_d2d_h2d_op()(this->d_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } else { #endif @@ -165,7 +165,7 @@ void PW_Basis_K::setupIndGk() #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { resmem_int_op()(this->d_igl2isz_k, this->npwk_max * this->nks); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_igl2isz_k, this->igl2isz_k, this->npwk_max * this->nks); + syncmem_int_h2d_op()(this->d_igl2isz_k, this->igl2isz_k, this->npwk_max * this->nks); } #endif this->get_ig2ixyz_k(); @@ -255,8 +255,8 @@ void PW_Basis_K::collect_local_pw(const double& erf_ecut_in, const double& erf_h else { resmem_dd_op()(this->d_gk2, this->npwk_max * this->nks); resmem_dd_op()(this->d_gcar, this->npwk_max * this->nks * 3); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_gk2, this->gk2, this->npwk_max * this->nks); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); + syncmem_d2d_h2d_op()(this->d_gk2, this->gk2, this->npwk_max * this->nks); + syncmem_d2d_h2d_op()(this->d_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); } } else { @@ -356,7 +356,7 @@ void PW_Basis_K::get_ig2ixyz_k() } } resmem_int_op()(ig2ixyz_k, this->npwk_max * this->nks); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->ig2ixyz_k, ig2ixyz_k_cpu, this->npwk_max * this->nks); + syncmem_int_h2d_op()(this->ig2ixyz_k, ig2ixyz_k_cpu, this->npwk_max * this->nks); delete[] ig2ixyz_k_cpu; } diff --git a/source/module_basis/module_pw/pw_basis_sup.cpp b/source/module_basis/module_pw/pw_basis_sup.cpp index 33ccc4b7e7..97fe4cd525 100644 --- a/source/module_basis/module_pw/pw_basis_sup.cpp +++ b/source/module_basis/module_pw/pw_basis_sup.cpp @@ -438,7 +438,7 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( if (this->device == "gpu") { resmem_int_op()(d_is2fftixy, this->nst); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_is2fftixy, this->is2fftixy, this->nst); + syncmem_int_h2d_op()(this->d_is2fftixy, this->is2fftixy, this->nst); } #endif return; diff --git a/source/module_basis/module_pw/pw_distributeg.cpp b/source/module_basis/module_pw/pw_distributeg.cpp index 0f1b29554a..5e32c58963 100644 --- a/source/module_basis/module_pw/pw_distributeg.cpp +++ b/source/module_basis/module_pw/pw_distributeg.cpp @@ -205,7 +205,7 @@ void PW_Basis::get_ig2isz_is2fftixy( #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { resmem_int_op()(d_is2fftixy, this->nst); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_is2fftixy, this->is2fftixy, this->nst); + syncmem_int_h2d_op()(this->d_is2fftixy, this->is2fftixy, this->nst); } #endif return; diff --git a/source/module_basis/module_pw/pw_transform_k.cpp b/source/module_basis/module_pw/pw_transform_k.cpp index 16661f7664..e230066c8f 100644 --- a/source/module_basis/module_pw/pw_transform_k.cpp +++ b/source/module_basis/module_pw/pw_transform_k.cpp @@ -345,8 +345,6 @@ void PW_Basis_K::real_to_recip(const base_device::DEVICE_GPU* ctx, assert(this->poolnproc == 1); base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - ctx, - ctx, this->fft_bundle.get_auxr_3d_data(), in, this->nrxx); @@ -379,9 +377,7 @@ void PW_Basis_K::real_to_recip(const base_device::DEVICE_GPU* ctx, base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(ctx, - ctx, - this->fft_bundle.get_auxr_3d_data(), + base_device::DEVICE_GPU>()(this->fft_bundle.get_auxr_3d_data(), in, this->nrxx); diff --git a/source/module_elecstate/kernels/test/elecstate_op_test.cpp b/source/module_elecstate/kernels/test/elecstate_op_test.cpp index 499a11e811..973df83cea 100644 --- a/source/module_elecstate/kernels/test/elecstate_op_test.cpp +++ b/source/module_elecstate/kernels/test/elecstate_op_test.cpp @@ -109,8 +109,8 @@ TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_op_gpu) std::complex* d_wfcr = NULL; resize_memory_var_op()(d_rho_data, rho_data.size()); resize_memory_complex_op()(d_wfcr, wfcr.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_rho_data, rho_data.data(), rho_data.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_wfcr, wfcr.data(), wfcr.size()); + syncmem_var_h2d_op()(d_rho_data, rho_data.data(), rho_data.size()); + syncmem_complex_h2d_op()(d_wfcr, wfcr.data(), wfcr.size()); double ** rho = new double* [1]; rho[0] = d_rho_data; elecstate_gpu_op()( @@ -120,7 +120,7 @@ TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_op_gpu) rho, d_wfcr); - syncmem_var_d2h_op()(cpu_ctx, gpu_ctx, rho_data.data(), d_rho_data, rho_data.size()); + syncmem_var_d2h_op()(rho_data.data(), d_rho_data, rho_data.size()); // check the result for (int ii = 0; ii < rho_data.size(); ii++) { EXPECT_LT(fabs(rho_data[ii] - expected_rho[ii]), 6e-5); @@ -139,9 +139,9 @@ TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_spin_op_gpu) resize_memory_var_op()(d_rho_data_2, rho_data_2.size()); resize_memory_complex_op()(d_wfcr_2, wfcr_2.size()); resize_memory_complex_op()(d_wfcr_another_spin_2, wfcr_another_spin_2.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_rho_data_2, rho_data_2.data(), rho_data_2.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_wfcr_2, wfcr_2.data(), wfcr_2.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_wfcr_another_spin_2, wfcr_another_spin_2.data(), wfcr_another_spin_2.size()); + syncmem_var_h2d_op()(d_rho_data_2, rho_data_2.data(), rho_data_2.size()); + syncmem_complex_h2d_op()(d_wfcr_2, wfcr_2.data(), wfcr_2.size()); + syncmem_complex_h2d_op()(d_wfcr_another_spin_2, wfcr_another_spin_2.data(), wfcr_another_spin_2.size()); double ** rho = new double* [4]; rho[0] = d_rho_data_2; rho[1] = d_rho_data_2 + this->nrxx; @@ -158,7 +158,7 @@ TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_spin_op_gpu) d_wfcr_2, d_wfcr_another_spin_2); - syncmem_var_d2h_op()(cpu_ctx, gpu_ctx, rho_data_2.data(), d_rho_data_2, rho_data_2.size()); + syncmem_var_d2h_op()(rho_data_2.data(), d_rho_data_2, rho_data_2.size()); // check the result for (int ii = 0; ii < rho_data_2.size(); ii++) { EXPECT_LT(fabs(rho_data_2[ii] - expected_rho_2[ii]), 5e-4); diff --git a/source/module_elecstate/potentials/potential_new.cpp b/source/module_elecstate/potentials/potential_new.cpp index 8b62f695f7..56c8446138 100644 --- a/source/module_elecstate/potentials/potential_new.cpp +++ b/source/module_elecstate/potentials/potential_new.cpp @@ -193,14 +193,10 @@ void Potential::update_from_charge(const Charge*const chg, const UnitCell*const this->vofk_smooth.nr * this->vofk_smooth.nc); } else { - syncmem_d2d_h2d_op()(gpu_ctx, - cpu_ctx, - d_veff_smooth, + syncmem_d2d_h2d_op()(d_veff_smooth, this->veff_smooth.c, this->veff_smooth.nr * this->veff_smooth.nc); - syncmem_d2d_h2d_op()(gpu_ctx, - cpu_ctx, - d_vofk_smooth, + syncmem_d2d_h2d_op()(d_vofk_smooth, this->vofk_smooth.c, this->vofk_smooth.nr * this->vofk_smooth.nc); } diff --git a/source/module_hamilt_general/hamilt.h b/source/module_hamilt_general/hamilt.h index 70dcd1b20a..cb204cc298 100644 --- a/source/module_hamilt_general/hamilt.h +++ b/source/module_hamilt_general/hamilt.h @@ -39,7 +39,7 @@ class Hamilt const int nbands // number of bands ) const { - syncmem_op()(this->ctx, this->ctx, spsi, psi_in, static_cast(nbands * nrow)); + syncmem_op()(spsi, psi_in, static_cast(nbands * nrow)); } /// core function: return H(k) and S(k) matrixs for direct solving eigenvalues. diff --git a/source/module_hamilt_general/operator.cpp b/source/module_hamilt_general/operator.cpp index 008d5e30e3..e9020866e6 100644 --- a/source/module_hamilt_general/operator.cpp +++ b/source/module_hamilt_general/operator.cpp @@ -59,7 +59,7 @@ typename Operator::hpsi_info Operator::hPsi(hpsi_info& inp if (this->in_place) { // ModuleBase::GlobalFunc::COPYARRAY(this->hpsi->get_pointer(), hpsi_pointer, this->hpsi->size()); - syncmem_op()(this->ctx, this->ctx, hpsi_pointer, this->hpsi->get_pointer(), this->hpsi->size()); + syncmem_op()(hpsi_pointer, this->hpsi->get_pointer(), this->hpsi->size()); delete this->hpsi; this->hpsi = new psi::Psi(hpsi_pointer, 1, diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp index 65853aa0d8..8ee4ce9c08 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp @@ -28,7 +28,7 @@ void spinconstrain::SpinConstrain>::calculate_delta_hcc(std base_device::DEVICE_GPU* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; base_device::memory::resize_memory_op, base_device::DEVICE_CPU>()(becp_cpu, size_ps); - base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(cpu_ctx, ctx, becp_cpu, becp_k, size_ps); + base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(becp_cpu, becp_k, size_ps); #endif } else if (PARAM.inp.device == "cpu") @@ -69,7 +69,7 @@ void spinconstrain::SpinConstrain>::calculate_delta_hcc(std base_device::DEVICE_GPU* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(ps_pointer, size_ps); - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(ctx, cpu_ctx, ps_pointer, ps.data(), size_ps); + base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(ps_pointer, ps.data(), size_ps); #endif } else if (PARAM.inp.device == "cpu") @@ -286,10 +286,10 @@ void spinconstrain::SpinConstrain>::cal_mw_from_lambda(int /// update H(k) for each k point hamilt_t->updateHk(ik); hsolver::DiagoIterAssist, base_device::DEVICE_GPU>::cal_hs_subspace(hamilt_t, psi_t[0], h_k, s_k); - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(ctx, ctx, becp_k, onsite_p->get_becp(), size_becp); + base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(becp_k, onsite_p->get_becp(), size_becp); } - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(ctx, ctx, h_tmp, h_k, nbands * nbands); - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(ctx, ctx, s_tmp, s_k, nbands * nbands); + base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(h_tmp, h_k, nbands * nbands); + base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(s_tmp, s_k, nbands * nbands); // update h_tmp by delta_lambda if (i_step != -1) this->calculate_delta_hcc(h_tmp, becp_k, delta_lambda, nbands, nkb, nh_iat); @@ -301,7 +301,7 @@ void spinconstrain::SpinConstrain>::cal_mw_from_lambda(int nkb * npol, &this->pelec->ekb(ik, 0)); // copy becp_pointer from GPU to CPU - base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(cpu_ctx, ctx, &becp_tmp[ik * size_becp], becp_pointer, size_becp); + base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(&becp_tmp[ik * size_becp], becp_pointer, size_becp); } // free memory for becp_pointer in GPU device @@ -474,8 +474,8 @@ void spinconstrain::SpinConstrain>::update_psi_charge(const std::complex* becp_k = this->becp_save + ik * size_becp; psi_t->fix_k(ik); - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(ctx, ctx, h_tmp, h_k, nbands * nbands); - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(ctx, ctx, s_tmp, s_k, nbands * nbands); + base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(h_tmp, h_k, nbands * nbands); + base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(s_tmp, s_k, nbands * nbands); this->calculate_delta_hcc(h_tmp, becp_k, delta_lambda, nbands, nkb, nh_iat); hsolver::DiagoIterAssist, base_device::DEVICE_GPU>::diag_subspace_psi(h_tmp, s_tmp, diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index 8acf0f4896..a77c8fc5f8 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -471,9 +471,9 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, resmem_int_op()(atom_nh, ucell.ntype); resmem_int_op()(atom_nb, ucell.ntype); resmem_int_op()(atom_na, ucell.ntype); - syncmem_int_op()(ctx, cpu_ctx, atom_nh, h_atom_nh, ucell.ntype); - syncmem_int_op()(ctx, cpu_ctx, atom_nb, h_atom_nb, ucell.ntype); - syncmem_int_op()(ctx, cpu_ctx, atom_na, h_atom_na, ucell.ntype); + syncmem_int_op()(atom_nh, h_atom_nh, ucell.ntype); + syncmem_int_op()(atom_nb, h_atom_nb, ucell.ntype); + syncmem_int_op()(atom_na, h_atom_na, ucell.ntype); resmem_var_op()(gk, npw * 3); castmem_var_h2d_op()(ctx, cpu_ctx, gk, reinterpret_cast(_gk), npw * 3); @@ -881,16 +881,16 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_ } else { - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_qq_so, this->qq_so.ptr, this->qq_so.getSize()); + syncmem_z2z_h2d_op()(this->z_qq_so, this->qq_so.ptr, this->qq_so.getSize()); } // Even when the single precision flag is enabled, // these variables are utilized in the Force/Stress calculation as well. // modified by denghuilu at 2023-05-15 - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_indv, this->indv.c, this->indv.nr * this->indv.nc); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_nhtol, this->nhtol.c, this->nhtol.nr * this->nhtol.nc); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_nhtolm, this->nhtolm.c, this->nhtolm.nr * this->nhtolm.nc); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_tab, this->tab.ptr, this->tab.getSize()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, this->d_qq_nt, this->qq_nt.ptr, this->qq_nt.getSize()); + syncmem_d2d_h2d_op()(this->d_indv, this->indv.c, this->indv.nr * this->indv.nc); + syncmem_d2d_h2d_op()(this->d_nhtol, this->nhtol.c, this->nhtol.nr * this->nhtol.nc); + syncmem_d2d_h2d_op()(this->d_nhtolm, this->nhtolm.c, this->nhtolm.nr * this->nhtolm.nc); + syncmem_d2d_h2d_op()(this->d_tab, this->tab.ptr, this->tab.getSize()); + syncmem_d2d_h2d_op()(this->d_qq_nt, this->qq_nt.ptr, this->qq_nt.getSize()); } else { @@ -1503,15 +1503,11 @@ void pseudopot_cell_vnl::cal_effective_D(const ModuleBase::matrix& veff, } else { - syncmem_z2z_h2d_op()(gpu_ctx, - cpu_ctx, - this->z_deeq_nc, + syncmem_z2z_h2d_op()(this->z_deeq_nc, this->deeq_nc.ptr, PARAM.inp.nspin * cell.nat * this->nhm * this->nhm); } - syncmem_d2d_h2d_op()(gpu_ctx, - cpu_ctx, - this->d_deeq, + syncmem_d2d_h2d_op()(this->d_deeq, this->deeq.ptr, PARAM.inp.nspin * cell.nat * this->nhm * this->nhm); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp index f055463e91..8f659fd729 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp @@ -141,10 +141,10 @@ void Forces::cal_force_cc(ModuleBase::matrix& forcecc, resmem_complex_op()(psiv_d, rho_basis->nmaxgr); resmem_var_op()(force_d, 3); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gv_x_d, gv_x.data(), rho_basis->npw); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gv_y_d, gv_y.data(), rho_basis->npw); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gv_z_d, gv_z.data(), rho_basis->npw); - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, psiv_d, psiv, rho_basis->nmaxgr); + syncmem_var_h2d_op()(gv_x_d, gv_x.data(), rho_basis->npw); + syncmem_var_h2d_op()(gv_y_d, gv_y.data(), rho_basis->npw); + syncmem_var_h2d_op()(gv_z_d, gv_z.data(), rho_basis->npw); + syncmem_complex_h2d_op()(psiv_d, psiv, rho_basis->nmaxgr); } @@ -178,7 +178,7 @@ void Forces::cal_force_cc(ModuleBase::matrix& forcecc, } if(this->device == base_device::GpuDevice ) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rhocgigg_vec_d, rhocgigg_vec.data(), rho_basis->npw); + syncmem_var_h2d_op()(rhocgigg_vec_d, rhocgigg_vec.data(), rho_basis->npw); } for (int ia = 0; ia < ucell_in.atoms[it].na; ++ia) { @@ -188,12 +188,12 @@ void Forces::cal_force_cc(ModuleBase::matrix& forcecc, double force[3] = {0, 0, 0}; if(this->device == base_device::GpuDevice ) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, force_d, force, 3); + syncmem_var_h2d_op()(force_d, force, 3); hamilt::cal_force_npw_op()( psiv_d, gv_x_d, gv_y_d, gv_z_d, rhocgigg_vec_d, force_d, pos.x, pos.y, pos.z, rho_basis->npw, ucell_in.omega, ucell_in.tpiba ); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, force, force_d, 3); + syncmem_var_d2h_op()(force, force_d, 3); } else { hamilt::cal_force_npw_op()( @@ -316,16 +316,16 @@ void Forces::deriv_drhoc resmem_var_op()(gx_arr_d, rho_basis->ngg); resmem_var_op()(drhocg_d, rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gx_arr_d, gx_arr.data(), rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, mesh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rab_d, rab, mesh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rhoc_d, rhoc, mesh); + syncmem_var_h2d_op()(gx_arr_d, gx_arr.data(), rho_basis->ngg); + syncmem_var_h2d_op()(r_d, r, mesh); + syncmem_var_h2d_op()(rab_d, rab, mesh); + syncmem_var_h2d_op()(rhoc_d, rhoc, mesh); } if(this->device == base_device::GpuDevice) { hamilt::cal_stress_drhoc_aux_op()( r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,mesh,igl0,rho_basis->ngg-igl0,ucell_in.omega,type); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); + syncmem_var_d2h_op()(drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp index b3891b7f12..3ab586f7f9 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp @@ -62,7 +62,7 @@ void Forces::cal_force_nl(ModuleBase::matrix& forcenl, nl_tools.cal_force(ik, max_nbands, npm, true, force); } // end ik - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, forcenl.c, force, forcenl.nr * forcenl.nc); + syncmem_var_d2h_op()(forcenl.c, force, forcenl.nr * forcenl.nc); delmem_var_op()(this->ctx, force); // sum up forcenl from all processors Parallel_Reduce::reduce_all(forcenl.c, forcenl.nr * forcenl.nc); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp index 703f8fe141..b216533e15 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp @@ -65,7 +65,7 @@ void Forces::cal_force_onsite(ModuleBase::matrix& force_onsite, } // end ik - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, force_onsite.c, force, force_onsite.nr * force_onsite.nc); + syncmem_var_d2h_op()(force_onsite.c, force, force_onsite.nr * force_onsite.nc); delmem_var_op()(this->ctx, force); // sum up force_onsite from all processors Parallel_Reduce::reduce_all(force_onsite.c, force_onsite.nr * force_onsite.nc); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp index 347212221e..e36df1bb23 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp @@ -198,20 +198,18 @@ void Forces::deriv_drhoc_scc(const bool& numeric, resmem_var_op()(gx_arr_d, rho_basis->ngg); resmem_var_op()(drhocg_d, rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, - this->cpu_ctx, - gx_arr_d, + syncmem_var_h2d_op()(gx_arr_d, gx_arr.data(), rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, mesh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rab_d, rab, mesh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rhoc_d, rhoc, mesh); + syncmem_var_h2d_op()(r_d, r, mesh); + syncmem_var_h2d_op()(rab_d, rab, mesh); + syncmem_var_h2d_op()(rhoc_d, rhoc, mesh); } if(this->device == base_device::GpuDevice) { hamilt::cal_stress_drhoc_aux_op()( r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,mesh,igl0,rho_basis->ngg-igl0,ucell_in.omega,2); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); + syncmem_var_d2h_op()(drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); } else { hamilt::cal_stress_drhoc_aux_op()( diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp index 77c59c8644..c26598d99d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp @@ -72,8 +72,8 @@ void FS_Kin_tools::cal_gk(const int& ik) } if (this->device == base_device::GpuDevice) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_gk, gk[0], 3 * npwk_max); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_kfac, kfac.data(), npwk_max); + syncmem_var_h2d_op()(d_gk, gk[0], 3 * npwk_max); + syncmem_var_h2d_op()(d_kfac, kfac.data(), npwk_max); } } diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp index 8fdaa402e7..96888d306a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp @@ -83,21 +83,21 @@ void FS_Nonlocal_tools::allocate_memory(const ModuleBase::matrix resmem_var_op()(this->hd_ylm_deri, 3 * (_lmax + 1) * (_lmax + 1) * max_npw); const int nks = this->kv_->get_nks(); resmem_var_op()(d_wk, nks); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wk, this->kv_->wk.data(), nks); + syncmem_var_h2d_op()(d_wk, this->kv_->wk.data(), nks); if (this->device == base_device::GpuDevice) { resmem_var_op()(d_wg, wg.nr * wg.nc); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, wg.c, wg.nr * wg.nc); + syncmem_var_h2d_op()(d_wg, wg.c, wg.nr * wg.nc); if (p_ekb != nullptr) { resmem_var_op()(d_ekb, p_ekb->nr * p_ekb->nc); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_ekb, p_ekb->c, p_ekb->nr * p_ekb->nc); + syncmem_var_h2d_op()(d_ekb, p_ekb->c, p_ekb->nr * p_ekb->nc); } resmem_int_op()(atom_nh, this->ntype); resmem_int_op()(atom_na, this->ntype); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_nh, h_atom_nh.data(), this->ntype); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_na, h_atom_na.data(), this->ntype); + syncmem_int_h2d_op()(atom_nh, h_atom_nh.data(), this->ntype); + syncmem_int_h2d_op()(atom_na, h_atom_na.data(), this->ntype); resmem_var_op()(d_g_plus_k, max_npw * 5); resmem_var_op()(d_pref, max_nh); @@ -191,8 +191,8 @@ void FS_Nonlocal_tools::cal_vkb(const int& ik, const int& nbdall maths.cal_ylm(lmax_, npw, g_plus_k.data(), hd_ylm); if (this->device == base_device::GpuDevice) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_g_plus_k, g_plus_k.data(), g_plus_k.size()); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_vq_tab, this->nlpp_->tab.ptr, this->nlpp_->tab.getSize()); + syncmem_var_h2d_op()(d_g_plus_k, g_plus_k.data(), g_plus_k.size()); + syncmem_var_h2d_op()(d_vq_tab, this->nlpp_->tab.ptr, this->nlpp_->tab.getSize()); gk = d_g_plus_k; vq_tb = d_vq_tab; } @@ -229,8 +229,8 @@ void FS_Nonlocal_tools::cal_vkb(const int& ik, const int& nbdall this->dvkb_indexes.data()); if (this->device == base_device::GpuDevice) { - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, d_dvkb_indexes, dvkb_indexes.data(), nh * 4); - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, d_pref_in, pref.data(), nh); + syncmem_int_h2d_op()(d_dvkb_indexes, dvkb_indexes.data(), nh * 4); + syncmem_complex_h2d_op()(d_pref_in, pref.data(), nh); } for (int ia = 0; ia < h_atom_na[it]; ia++) @@ -383,8 +383,8 @@ void FS_Nonlocal_tools::cal_vkb_deri_s(const int& ik, this->dvkb_indexes.data()); if (this->device == base_device::GpuDevice) { - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, d_dvkb_indexes, dvkb_indexes.data(), nh * 4); - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, d_pref_in, pref.data(), nh); + syncmem_int_h2d_op()(d_dvkb_indexes, dvkb_indexes.data(), nh * 4); + syncmem_complex_h2d_op()(d_pref_in, pref.data(), nh); } for (int ia = 0; ia < h_atom_na[it]; ia++) { @@ -732,8 +732,8 @@ void FS_Nonlocal_tools::transfer_gcar(const int& npw, const int& const int max_count = std::max(gcar_zero_counts[0], std::max(gcar_zero_counts[1], gcar_zero_counts[2])); resmem_complex_op()(this->vkb_save, this->nkb * max_count); // transfer the gcar and gcar_zero_indexes to the device - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gcar, gcar_tmp.data(), 3 * npw_max); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, gcar_zero_indexes, gcar_zero_indexes_tmp.data(), 3 * npw_max); + syncmem_var_h2d_op()(gcar, gcar_tmp.data(), 3 * npw_max); + syncmem_int_h2d_op()(gcar_zero_indexes, gcar_zero_indexes_tmp.data(), 3 * npw_max); } // cal_force diff --git a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp index 1b0992309b..69df325d6c 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp @@ -246,7 +246,7 @@ void HamiltPW::sPsi(const T* psi_in, // psi return; } - syncmem_op()(this->ctx, this->ctx, spsi, psi_in, static_cast(nbands * nrow)); + syncmem_op()(spsi, psi_in, static_cast(nbands * nrow)); if (PARAM.globalv.use_uspp) { T* becp = nullptr; diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp index 7129b78261..ae30a9d7e5 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp @@ -85,12 +85,12 @@ TEST_F(TestModuleHamiltEkinetic, ekinetic_pw_op_gpu) resize_memory_complex_double_op()(psi_dev, psi.size()); std::vector > hpsi(expected_hpsi.size(), std::complex(0.0, 0.0)); resize_memory_complex_double_op()(hpsi_dev, hpsi.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, hpsi_dev, hpsi.data(), hpsi.size()); - syncmem_d_h2d_op()(gpu_ctx, cpu_ctx, gk2_dev, gk2.data(), gk2.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, psi_dev, psi.data(), psi.size()); + syncmem_cd_h2d_op()(hpsi_dev, hpsi.data(), hpsi.size()); + syncmem_d_h2d_op()(gk2_dev, gk2.data(), gk2.size()); + syncmem_cd_h2d_op()(psi_dev, psi.data(), psi.size()); // ekinetic_cpu_op()(cpu_ctx, band, dim, dim, tpiba2, gk2.data(), hpsi.data(), psi.data()); ekinetic_gpu_op()(gpu_ctx, band, dim, dim, false, tpiba2, gk2_dev, hpsi_dev, psi_dev); - syncmem_cd_d2h_op()(cpu_ctx, gpu_ctx, hpsi.data(), hpsi_dev, hpsi.size()); + syncmem_cd_d2h_op()(hpsi.data(), hpsi_dev, hpsi.size()); for (int ii = 0; ii < hpsi.size(); ii++) { EXPECT_LT(fabs(hpsi[ii] - expected_hpsi[ii]), 1e-6); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp index 3526bc705d..0161f2b6f4 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp @@ -2922,9 +2922,9 @@ TEST_F(TestSrcPWForceMultiDevice, cal_vkb1_nl_op_gpu) resmem_complex_op()(d_res, res.size()); resmem_complex_op()(d_vkb, vkb.size()); resmem_var_op()(d_gcar, gcar.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_vkb, vkb.data(), vkb.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_gcar, gcar.data(), gcar.size()); + syncmem_complex_h2d_op()(d_res, res.data(), res.size()); + syncmem_complex_h2d_op()(d_vkb, vkb.data(), vkb.size()); + syncmem_var_h2d_op()(d_gcar, gcar.data(), gcar.size()); hamilt::cal_vkb1_nl_op()(gpu_ctx, nkb, @@ -2936,7 +2936,7 @@ TEST_F(TestSrcPWForceMultiDevice, cal_vkb1_nl_op_gpu) d_vkb, d_gcar, d_res); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + syncmem_complex_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < res.size(); ii++) { @@ -2958,23 +2958,23 @@ TEST_F(TestSrcPWForceMultiDevice, cal_force_nl_op_gpu) resmem_var_op()(d_deeq, deeq.size()); resmem_var_op()(d_ekb, ekb.size()); resmem_var_op()(d_qq_nt, qq_nt.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_wg, wg.data(), wg.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_deeq, deeq.data(), deeq.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_ekb, ekb.data(), ekb.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_qq_nt, qq_nt.data(), qq_nt.size()); + syncmem_var_h2d_op()(d_wg, wg.data(), wg.size()); + syncmem_var_h2d_op()(d_res, res.data(), res.size()); + syncmem_var_h2d_op()(d_deeq, deeq.data(), deeq.size()); + syncmem_var_h2d_op()(d_ekb, ekb.data(), ekb.size()); + syncmem_var_h2d_op()(d_qq_nt, qq_nt.data(), qq_nt.size()); int *d_atom_nh = nullptr, *d_atom_na = nullptr; resmem_int_op()(d_atom_nh, atom_nh.size()); resmem_int_op()(d_atom_na, atom_na.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nh, atom_nh.data(), atom_nh.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); + syncmem_int_h2d_op()(d_atom_nh, atom_nh.data(), atom_nh.size()); + syncmem_int_h2d_op()(d_atom_na, atom_na.data(), atom_na.size()); std::complex*d_becp = nullptr, *d_dbecp = nullptr; resmem_complex_op()(d_becp, becp.size()); resmem_complex_op()(d_dbecp, dbecp.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_becp, becp.data(), becp.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_dbecp, dbecp.data(), dbecp.size()); + syncmem_complex_h2d_op()(d_becp, becp.data(), becp.size()); + syncmem_complex_h2d_op()(d_dbecp, dbecp.data(), dbecp.size()); hamilt::cal_force_nl_op()(gpu_ctx, multi_proj, @@ -2998,7 +2998,7 @@ TEST_F(TestSrcPWForceMultiDevice, cal_force_nl_op_gpu) d_becp, d_dbecp, d_res); - syncmem_var_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + syncmem_var_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < res.size(); ii++) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp index aafae35d27..14968e1d10 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp @@ -64,14 +64,14 @@ TEST_F(TestModuleHamiltMeta, meta_pw_op_gpu) resmem_var_op()(d_kvec_c, kvec_c.size()); resmem_complex_op()(d_in, in.size()); resmem_complex_op()(d_res, res.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_gcar, gcar.data(), gcar.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_kvec_c, kvec_c.data(), kvec_c.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_in, in.data(), in.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); + syncmem_var_h2d_op()(d_gcar, gcar.data(), gcar.size()); + syncmem_var_h2d_op()(d_kvec_c, kvec_c.data(), kvec_c.size()); + syncmem_complex_h2d_op()(d_in, in.data(), in.size()); + syncmem_complex_h2d_op()(d_res, res.data(), res.size()); meta_gpu_op()(gpu_ctx, ik, pol, npw, npwx, tpiba, d_gcar, d_kvec_c, d_in, d_res); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + syncmem_complex_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < res.size(); ii++) { EXPECT_LT(fabs(res[ii] - expected_out[ii]), 6e-5); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp index fd7ce1f98b..df28e1766a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp @@ -130,9 +130,9 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_op_gpu) resize_memory_double_op()(deeq_dev, deeq.size()); resize_memory_complex_double_op()(ps_dev, ps.size()); resize_memory_complex_double_op()(becp_dev, becp.size()); - syncmem_d_h2d_op()(gpu_ctx, cpu_ctx, deeq_dev, deeq.data(), deeq.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, ps_dev, ps.data(), ps.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, becp_dev, becp.data(), becp.size()); + syncmem_d_h2d_op()(deeq_dev, deeq.data(), deeq.size()); + syncmem_cd_h2d_op()(ps_dev, ps.data(), ps.size()); + syncmem_cd_h2d_op()(becp_dev, becp.data(), becp.size()); nonlocal_gpu_op()( gpu_ctx, l1, l2, l3, @@ -141,7 +141,7 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_op_gpu) deeq_dev, ps_dev, becp_dev); - syncmem_cd_d2h_op()(cpu_ctx, gpu_ctx, ps.data(), ps_dev, ps.size()); + syncmem_cd_d2h_op()(ps.data(), ps_dev, ps.size()); for (int ii = 0; ii < ps.size(); ii++) { EXPECT_LT(fabs(ps[ii] - expected_ps[ii]), 5 * 1e-6); } @@ -160,9 +160,9 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_spin_op_gpu) resize_memory_complex_double_op()(deeq_dev, deeq_spin.size()); resize_memory_complex_double_op()(ps_dev, ps.size()); resize_memory_complex_double_op()(becp_dev, becp_spin.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, deeq_dev, deeq_spin.data(), deeq_spin.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, ps_dev, ps.data(), ps.size()); - syncmem_cd_h2d_op()(gpu_ctx, cpu_ctx, becp_dev, becp_spin.data(), becp_spin.size()); + syncmem_cd_h2d_op()(deeq_dev, deeq_spin.data(), deeq_spin.size()); + syncmem_cd_h2d_op()(ps_dev, ps.data(), ps.size()); + syncmem_cd_h2d_op()(becp_dev, becp_spin.data(), becp_spin.size()); nonlocal_gpu_op()( gpu_ctx, l1, l2_spin, l3, @@ -171,7 +171,7 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_spin_op_gpu) deeq_dev, ps_dev, becp_dev); - syncmem_cd_d2h_op()(cpu_ctx, gpu_ctx, ps.data(), ps_dev, ps.size()); + syncmem_cd_d2h_op()(ps.data(), ps_dev, ps.size()); for (int ii = 0; ii < ps.size(); ii++) { EXPECT_LT(fabs(ps[ii] - expected_ps_spin[ii]), 5 * 1e-6); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp index 58a42f9238..98390737f7 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp @@ -143,18 +143,18 @@ TEST(TestSrcPWStressMultiDevice, cal_dbecp_noevc_nl_op_gpu) resmem_zd_op()(d_vkb1, vkb1.size()); resmem_zd_op()(d_vkb2, vkb2.size()); resmem_zd_op()(d_dbecp_noevc, dbecp_noevc.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb0i, vkb0i.data(), vkb0i.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb0j, vkb0j.data(), vkb0j.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb, vkb.data(), vkb.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb1, vkb1.data(), vkb1.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_vkb2, vkb2.data(), vkb2.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_dbecp_noevc, dbecp_noevc.data(), dbecp_noevc.size()); + syncmem_z2z_h2d_op()(d_vkb0i, vkb0i.data(), vkb0i.size()); + syncmem_z2z_h2d_op()(d_vkb0j, vkb0j.data(), vkb0j.size()); + syncmem_z2z_h2d_op()(d_vkb, vkb.data(), vkb.size()); + syncmem_z2z_h2d_op()(d_vkb1, vkb1.data(), vkb1.size()); + syncmem_z2z_h2d_op()(d_vkb2, vkb2.data(), vkb2.size()); + syncmem_z2z_h2d_op()(d_dbecp_noevc, dbecp_noevc.data(), dbecp_noevc.size()); resmem_dd_op()(d_gcar, gcar.size()); resmem_dd_op()(d_kvec_c, kvec_c.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_gcar, gcar.data(), gcar.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_kvec_c, kvec_c.data(), kvec_c.size()); + syncmem_d2d_h2d_op()(d_gcar, gcar.data(), gcar.size()); + syncmem_d2d_h2d_op()(d_kvec_c, kvec_c.data(), kvec_c.size()); hamilt::cal_dbecp_noevc_nl_op()(gpu_ctx, ipol, @@ -173,7 +173,7 @@ TEST(TestSrcPWStressMultiDevice, cal_dbecp_noevc_nl_op_gpu) d_vkb2, d_dbecp_noevc); - syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, dbecp_noevc.data(), d_dbecp_noevc, dbecp_noevc.size()); + syncmem_z2z_d2h_op()(dbecp_noevc.data(), d_dbecp_noevc, dbecp_noevc.size()); for (int ii = 0; ii < dbecp_noevc.size(); ii++) { EXPECT_LT(fabs(dbecp_noevc[ii] - expected_dbecpnoevc[ii]), 6e-5); @@ -238,19 +238,19 @@ TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) int * d_atom_nh = nullptr, * d_atom_na = nullptr; resmem_zd_op()(d_becp, becp.size()); resmem_zd_op()(d_dbecp, dbecp.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_becp, becp.data(), becp.size()); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, d_dbecp, dbecp.data(), dbecp.size()); + syncmem_z2z_h2d_op()(d_becp, becp.data(), becp.size()); + syncmem_z2z_h2d_op()(d_dbecp, dbecp.data(), dbecp.size()); resmem_dd_op()(d_wg, wg.size()); resmem_dd_op()(d_deeq, deeq.size()); resmem_dd_op()(d_stress, stress.size()); resmem_dd_op()(d_ekb, ekb.size()); resmem_dd_op()(d_qq_nt, qq_nt.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_wg, wg.data(), wg.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_deeq, deeq.data(), deeq.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_stress, stress.data(), stress.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_ekb, ekb.data(), ekb.size()); - syncmem_d2d_h2d_op()(gpu_ctx, cpu_ctx, d_qq_nt, qq_nt.data(), qq_nt.size()); + syncmem_d2d_h2d_op()(d_wg, wg.data(), wg.size()); + syncmem_d2d_h2d_op()(d_deeq, deeq.data(), deeq.size()); + syncmem_d2d_h2d_op()(d_stress, stress.data(), stress.size()); + syncmem_d2d_h2d_op()(d_ekb, ekb.data(), ekb.size()); + syncmem_d2d_h2d_op()(d_qq_nt, qq_nt.data(), qq_nt.size()); using delmem_int_op = base_device::memory::delete_memory_op; using resmem_int_op = base_device::memory::resize_memory_op; @@ -259,8 +259,8 @@ TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) resmem_int_op()(d_atom_nh, atom_nh.size()); resmem_int_op()(d_atom_na, atom_na.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nh, atom_nh.data(), atom_nh.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); + syncmem_int_h2d_op()(d_atom_nh, atom_nh.data(), atom_nh.size()); + syncmem_int_h2d_op()(d_atom_na, atom_na.data(), atom_na.size()); hamilt::cal_stress_nl_op()(gpu_ctx, multi_proj, @@ -284,7 +284,7 @@ TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) d_dbecp, d_stress); - syncmem_d2d_d2h_op()(cpu_ctx, gpu_ctx, stress.data(), d_stress, stress.size()); + syncmem_d2d_d2h_op()(stress.data(), d_stress, stress.size()); for (int ii = 0; ii < stress.size(); ii++) { EXPECT_LT(fabs(stress[ii] - expected_stress[ii]), 6e-5); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp index 67a453e34b..82fb4411ba 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp @@ -91,12 +91,12 @@ TEST_F(TestModuleHamiltVeff, veff_pw_op_gpu) std::complex* d_res = NULL; resize_memory_double_op()(d_in, in.size()); resize_memory_complex_op()(d_res, res.size()); - syncmem_double_h2d_op()(gpu_ctx, cpu_ctx, d_in, in.data(), in.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); + syncmem_double_h2d_op()(d_in, in.data(), in.size()); + syncmem_complex_h2d_op()(d_res, res.data(), res.size()); veff_gpu_op()(gpu_ctx, this->size, d_res, d_in); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); + syncmem_complex_d2h_op()(res.data(), d_res, res.size()); for (int ii = 0; ii < res.size(); ii++) { EXPECT_LT(fabs(res[ii] - expected_out[ii]), 6e-5); } @@ -115,9 +115,9 @@ TEST_F(TestModuleHamiltVeff, veff_pw_spin_op_gpu) resize_memory_double_op()(d_in, in_spin.size()); resize_memory_complex_op()(d_res, res.size()); resize_memory_complex_op()(d_res1, res1.size()); - syncmem_double_h2d_op()(gpu_ctx, cpu_ctx, d_in, in_spin.data(), in_spin.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res, res.data(), res.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_res1, res1.data(), res1.size()); + syncmem_double_h2d_op()(d_in, in_spin.data(), in_spin.size()); + syncmem_complex_h2d_op()(d_res, res.data(), res.size()); + syncmem_complex_h2d_op()(d_res1, res1.data(), res1.size()); const double * in_[4]; for (int ii = 0; ii < 4; ii++) { @@ -126,8 +126,8 @@ TEST_F(TestModuleHamiltVeff, veff_pw_spin_op_gpu) veff_gpu_op()(gpu_ctx, this->size, d_res, d_res1, in_); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, res.data(), d_res, res.size()); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, res1.data(), d_res1, res1.size()); + syncmem_complex_d2h_op()(res.data(), d_res, res.size()); + syncmem_complex_d2h_op()(res1.data(), d_res1, res1.size()); for (int ii = 0; ii < res.size(); ii++) { EXPECT_LT(fabs(res[ii] - expected_out_spin[ii]), 7.5e-5); EXPECT_LT(fabs(res1[ii] - expected_out1_spin[ii]), 6e-5); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp index 04d27fa92e..1ce0e90c10 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp @@ -4052,9 +4052,9 @@ TEST_F(TestSrcPWVnlMultiDevice, cal_vnl_op_gpu) resmem_int_op()(d_atom_na, atom_na.size()); resmem_int_op()(d_atom_nb, atom_nb.size()); resmem_int_op()(d_atom_nh, atom_nh.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nb, atom_nb.data(), atom_nb.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_nh, atom_nh.data(), atom_nh.size()); + syncmem_int_h2d_op()(d_atom_na, atom_na.data(), atom_na.size()); + syncmem_int_h2d_op()(d_atom_nb, atom_nb.data(), atom_nb.size()); + syncmem_int_h2d_op()(d_atom_nh, atom_nh.data(), atom_nh.size()); resmem_var_op()(d_gk, gk.size()); resmem_var_op()(d_ylm, ylm.size()); @@ -4064,19 +4064,19 @@ TEST_F(TestSrcPWVnlMultiDevice, cal_vnl_op_gpu) resmem_var_op()(d_tab, tab.size()); resmem_var_op()(d_vkb1, vkb1.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_gk, gk.data(), gk.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_ylm, ylm.data(), ylm.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_indv, indv.data(), indv.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_nhtol, nhtol.data(), nhtol.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_nhtolm, nhtolm.data(), nhtolm.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_tab, tab.data(), tab.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_vkb1, vkb1.data(), vkb1.size()); + syncmem_var_h2d_op()(d_gk, gk.data(), gk.size()); + syncmem_var_h2d_op()(d_ylm, ylm.data(), ylm.size()); + syncmem_var_h2d_op()(d_indv, indv.data(), indv.size()); + syncmem_var_h2d_op()(d_nhtol, nhtol.data(), nhtol.size()); + syncmem_var_h2d_op()(d_nhtolm, nhtolm.data(), nhtolm.size()); + syncmem_var_h2d_op()(d_tab, tab.data(), tab.size()); + syncmem_var_h2d_op()(d_vkb1, vkb1.data(), vkb1.size()); resmem_complex_op()(d_sk, sk.size()); resmem_complex_op()(d_vkb, vkb.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_sk, sk.data(), sk.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_vkb, vkb.data(), vkb.size()); + syncmem_complex_h2d_op()(d_sk, sk.data(), sk.size()); + syncmem_complex_h2d_op()(d_vkb, vkb.data(), vkb.size()); hamilt::cal_vnl_op()(gpu_ctx, ntype, @@ -4101,7 +4101,7 @@ TEST_F(TestSrcPWVnlMultiDevice, cal_vnl_op_gpu) d_sk, d_vkb); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, vkb.data(), d_vkb, vkb.size()); + syncmem_complex_d2h_op()(vkb.data(), d_vkb, vkb.size()); for (int ii = 0; ii < vkb.size(); ii++) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp index 419dfbd536..e94c92c90d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp @@ -415,23 +415,23 @@ TEST_F(TestSrcPWWfMultiDevice, cal_sk_op_gpu) resmem_int_op()(d_atom_na, atom_na.size()); resmem_int_op()(d_igl2isz, igl2isz.size()); resmem_int_op()(d_is2fftixy, is2fftixy.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_atom_na, atom_na.data(), atom_na.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_igl2isz, igl2isz.data(), igl2isz.size()); - syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, d_is2fftixy, is2fftixy.data(), is2fftixy.size()); + syncmem_int_h2d_op()(d_atom_na, atom_na.data(), atom_na.size()); + syncmem_int_h2d_op()(d_igl2isz, igl2isz.data(), igl2isz.size()); + syncmem_int_h2d_op()(d_is2fftixy, is2fftixy.data(), is2fftixy.size()); resmem_var_op()(d_kvec_c, kvec_c.size()); resmem_var_op()(d_atom_tau, atom_tau.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_kvec_c, kvec_c.data(), kvec_c.size()); - syncmem_var_h2d_op()(gpu_ctx, cpu_ctx, d_atom_tau, atom_tau.data(), atom_tau.size()); + syncmem_var_h2d_op()(d_kvec_c, kvec_c.data(), kvec_c.size()); + syncmem_var_h2d_op()(d_atom_tau, atom_tau.data(), atom_tau.size()); resmem_complex_op()(d_sk, sk.size()); resmem_complex_op()(d_eigts1, eigts1.size()); resmem_complex_op()(d_eigts2, eigts2.size()); resmem_complex_op()(d_eigts3, eigts3.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_sk, sk.data(), sk.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_eigts1, eigts1.data(), eigts1.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_eigts2, eigts2.data(), eigts2.size()); - syncmem_complex_h2d_op()(gpu_ctx, cpu_ctx, d_eigts3, eigts3.data(), eigts3.size()); + syncmem_complex_h2d_op()(d_sk, sk.data(), sk.size()); + syncmem_complex_h2d_op()(d_eigts1, eigts1.data(), eigts1.size()); + syncmem_complex_h2d_op()(d_eigts2, eigts2.data(), eigts2.size()); + syncmem_complex_h2d_op()(d_eigts3, eigts3.data(), eigts3.size()); hamilt::cal_sk_op()(gpu_ctx, ik, @@ -459,7 +459,7 @@ TEST_F(TestSrcPWWfMultiDevice, cal_sk_op_gpu) d_eigts3, d_sk); - syncmem_complex_d2h_op()(cpu_ctx, gpu_ctx, sk.data(), d_sk, sk.size()); + syncmem_complex_d2h_op()(sk.data(), d_sk, sk.size()); for (int ii = 0; ii < sk.size(); ii++) { EXPECT_LT(fabs(sk[ii] - expected_sk[ii]), 6e-5); diff --git a/source/module_hamilt_pw/hamilt_pwdft/nonlocal_maths.hpp b/source/module_hamilt_pw/hamilt_pwdft/nonlocal_maths.hpp index aa28b5abe2..79649fab07 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/nonlocal_maths.hpp +++ b/source/module_hamilt_pw/hamilt_pwdft/nonlocal_maths.hpp @@ -164,7 +164,7 @@ void Nonlocal_maths::cal_ylm(int lmax, int npw, const FPTYPE* q, // calculate ModuleBase::YlmReal::Ylm_Real(cpu_ctx, ntot_ylm, npw, q, ylm_cpu.data()); // send from cpu to gpu - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, ylm, ylm_cpu.data(), ylm_cpu.size()); + syncmem_var_h2d_op()(ylm, ylm_cpu.data(), ylm_cpu.size()); } else { @@ -193,7 +193,7 @@ void Nonlocal_maths::cal_ylm_deri(int lmax, int npw, const FPTYP Nonlocal_maths::dylmr2(ntot_ylm, npw, q, &dylmdq_cpu[ipol * ntot_ylm * npw], ipol); } // send from cpu to gpu - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, out, dylmdq_cpu.data(), dylmdq_cpu.size()); + syncmem_var_h2d_op()(out, dylmdq_cpu.data(), dylmdq_cpu.size()); } else { diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp index fca63be74b..145cee7142 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp @@ -204,12 +204,12 @@ void Onsite_Proj_tools::allocate_memory(const ModuleBase::matrix { resmem_var_op()(d_wg, wg.nr * wg.nc); resmem_var_op()(d_ekb, ekb.nr * ekb.nc); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, wg.c, wg.nr * wg.nc); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_ekb, ekb.c, ekb.nr * ekb.nc); + syncmem_var_h2d_op()(d_wg, wg.c, wg.nr * wg.nc); + syncmem_var_h2d_op()(d_ekb, ekb.c, ekb.nr * ekb.nc); resmem_int_op()(atom_nh, this->ntype); resmem_int_op()(atom_na, this->ntype); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_nh, h_atom_nh.data(), this->ntype); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, atom_na, h_atom_na.data(), this->ntype); + syncmem_int_h2d_op()(atom_nh, h_atom_nh.data(), this->ntype); + syncmem_int_h2d_op()(atom_na, h_atom_na.data(), this->ntype); resmem_var_op()(d_g_plus_k, max_npw * 5); resmem_var_op()(d_pref, max_nh); @@ -347,8 +347,8 @@ void Onsite_Proj_tools::cal_becp(int ik, if (this->device == base_device::GpuDevice) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_g_plus_k, g_plus_k.data(), g_plus_k.size()); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_vq_tab, this->tabtpr->ptr, this->tabtpr->getSize()); + syncmem_var_h2d_op()(d_g_plus_k, g_plus_k.data(), g_plus_k.size()); + syncmem_var_h2d_op()(d_vq_tab, this->tabtpr->ptr, this->tabtpr->getSize()); gk = d_g_plus_k; vq_tb = d_vq_tab; } @@ -390,8 +390,8 @@ void Onsite_Proj_tools::cal_becp(int ik, if (this->device == base_device::GpuDevice) { - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, d_dvkb_indexes, dvkb_indexes.data(), nh * 4); - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, d_pref_in, pref.data(), nh); + syncmem_int_h2d_op()(d_dvkb_indexes, dvkb_indexes.data(), nh * 4); + syncmem_complex_h2d_op()(d_pref_in, pref.data(), nh); } for (int ia = 0; ia < h_atom_na[it]; ia++) @@ -444,9 +444,9 @@ void Onsite_Proj_tools::cal_becp(int ik, { std::complex* h_becp = nullptr; resmem_complex_h_op()(h_becp, size_becp_act); - syncmem_complex_d2h_op()(this->cpu_ctx, this->ctx, h_becp, becp_tmp, size_becp_act); + syncmem_complex_d2h_op()(h_becp, becp_tmp, size_becp_act); Parallel_Reduce::reduce_pool(h_becp, size_becp_act); - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, becp_tmp, h_becp, size_becp_act); + syncmem_complex_h2d_op()(becp_tmp, h_becp, size_becp_act); delmem_complex_h_op()(this->cpu_ctx, h_becp); } else @@ -540,8 +540,8 @@ void Onsite_Proj_tools::cal_dbecp_s(int ik, int npm, int ipol, i this->dvkb_indexes.data()); if (this->device == base_device::GpuDevice) { - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, d_dvkb_indexes, dvkb_indexes.data(), nh * 4); - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, d_pref_in, pref.data(), nh); + syncmem_int_h2d_op()(d_dvkb_indexes, dvkb_indexes.data(), nh * 4); + syncmem_complex_h2d_op()(d_pref_in, pref.data(), nh); } for (int ia = 0; ia < h_atom_na[it]; ia++) { @@ -801,8 +801,8 @@ void Onsite_Proj_tools::transfer_gcar(int npw, int npw_max, cons const int max_count = std::max(gcar_zero_counts[0], std::max(gcar_zero_counts[1], gcar_zero_counts[2])); resmem_complex_op()(this->vkb_save, this->nkb * max_count); // transfer the gcar and gcar_zero_indexes to the device - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gcar, gcar_tmp.data(), 3 * npw_max); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, gcar_zero_indexes, gcar_zero_indexes_tmp.data(), 3 * npw_max); + syncmem_var_h2d_op()(gcar, gcar_tmp.data(), 3 * npw_max); + syncmem_int_h2d_op()(gcar_zero_indexes, gcar_zero_indexes_tmp.data(), 3 * npw_max); } template @@ -820,10 +820,10 @@ void Onsite_Proj_tools::cal_force_dftu(int ik, if (this->device == base_device::GpuDevice) { resmem_int_op()(orbital_corr_tmp, this->ucell_->ntype); - syncmem_int_h2d_op()(this->ctx, cpu_ctx, orbital_corr_tmp, orbital_corr, this->ucell_->ntype); + syncmem_int_h2d_op()(orbital_corr_tmp, orbital_corr, this->ucell_->ntype); resmem_complex_op()(vu_tmp, size_vu); - syncmem_complex_h2d_op()(this->ctx, cpu_ctx, vu_tmp, vu, size_vu); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); + syncmem_complex_h2d_op()(vu_tmp, vu, size_vu); + syncmem_var_h2d_op()(d_wg, h_wg, this->nbands * (ik+1)); } else #endif @@ -878,8 +878,8 @@ void Onsite_Proj_tools::cal_force_dspin(int ik, if (this->device == base_device::GpuDevice) { resmem_var_op()(lambda_tmp, this->ucell_->nat * 3); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, lambda_tmp, lambda_array.data(), this->ucell_->nat * 3); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); + syncmem_var_h2d_op()(lambda_tmp, lambda_array.data(), this->ucell_->nat * 3); + syncmem_var_h2d_op()(d_wg, h_wg, this->nbands * (ik+1)); } else #endif @@ -928,10 +928,10 @@ void Onsite_Proj_tools::cal_stress_dftu(int ik, if (this->device == base_device::GpuDevice) { resmem_int_op()(orbital_corr_tmp, this->ucell_->ntype); - syncmem_int_h2d_op()(this->ctx, cpu_ctx, orbital_corr_tmp, orbital_corr, this->ucell_->ntype); + syncmem_int_h2d_op()(orbital_corr_tmp, orbital_corr, this->ucell_->ntype); resmem_complex_op()(vu_tmp, size_vu); - syncmem_complex_h2d_op()(this->ctx, cpu_ctx, vu_tmp, vu, size_vu); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); + syncmem_complex_h2d_op()(vu_tmp, vu, size_vu); + syncmem_var_h2d_op()(d_wg, h_wg, this->nbands * (ik+1)); } else #endif @@ -982,8 +982,8 @@ void Onsite_Proj_tools::cal_stress_dspin(int ik, if (this->device == base_device::GpuDevice) { resmem_var_op()(lambda_tmp, this->ucell_->nat * 3); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, lambda_tmp, lambda_array.data(), this->ucell_->nat * 3); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, d_wg, h_wg, this->nbands * (ik+1)); + syncmem_var_h2d_op()(lambda_tmp, lambda_array.data(), this->ucell_->nat * 3); + syncmem_var_h2d_op()(d_wg, h_wg, this->nbands * (ik+1)); } else #endif diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp index 9e329f0869..832379f445 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp @@ -403,7 +403,7 @@ void projectors::OnsiteProjector::overlap_proj_psi( this->fs_tools->cal_becp(ik_, npm/npol, this->becp, ppsi); // in cal_becp, npm should be the one not multiplied by npol if(this->device == base_device::GpuDevice) { - syncmem_complex_d2h_op()(this->cpu_ctx, this->ctx, h_becp, this->becp, this->size_becp); + syncmem_complex_d2h_op()(h_becp, this->becp, this->size_becp); } ModuleBase::timer::tick("OnsiteProj", "overlap"); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp index 7787b315dc..e03ba5494e 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp @@ -147,7 +147,7 @@ void OnsiteProj>::cal_ps_delta_spin(const int npol, const ip_iat0[ip0++] = iat; } } - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->ip_iat, ip_iat0.data(), onsite_p->get_tot_nproj()); + syncmem_int_h2d_op()(this->ip_iat, ip_iat0.data(), onsite_p->get_tot_nproj()); } // prepare array of nh_iat and lambda_array to pass to the onsite_ps_op operator @@ -159,7 +159,7 @@ void OnsiteProj>::cal_ps_delta_spin(const int npol, const tmp_lambda_coeff[iat * 4 + 2] = std::complex(lambda[iat][0], -1 * lambda[iat][1]); tmp_lambda_coeff[iat * 4 + 3] = std::complex(-1 * lambda[iat][2], 0.0); } - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, this->lambda_coeff, tmp_lambda_coeff.data(), this->ucell->nat * 4); + syncmem_complex_h2d_op()(this->lambda_coeff, tmp_lambda_coeff.data(), this->ucell->nat * 4); // TODO: code block above should be moved to the init function hamilt::onsite_ps_op()( @@ -285,15 +285,15 @@ void OnsiteProj>::cal_ps_dftu(const int npol, const int m) } } } - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->orb_l_iat, orb_l_iat0.data(), this->ucell->nat); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->ip_iat, ip_iat0.data(), onsite_p->get_tot_nproj()); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->ip_m, ip_m0.data(), onsite_p->get_tot_nproj()); - syncmem_int_h2d_op()(this->ctx, this->cpu_ctx, this->vu_begin_iat, vu_begin_iat0.data(), this->ucell->nat); + syncmem_int_h2d_op()(this->orb_l_iat, orb_l_iat0.data(), this->ucell->nat); + syncmem_int_h2d_op()(this->ip_iat, ip_iat0.data(), onsite_p->get_tot_nproj()); + syncmem_int_h2d_op()(this->ip_m, ip_m0.data(), onsite_p->get_tot_nproj()); + syncmem_int_h2d_op()(this->vu_begin_iat, vu_begin_iat0.data(), this->ucell->nat); resmem_complex_op()(this->vu_device, dftu->get_size_eff_pot_pw()); } - syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, this->vu_device, dftu->get_eff_pot_pw(0), dftu->get_size_eff_pot_pw()); + syncmem_complex_h2d_op()(this->vu_device, dftu->get_eff_pot_pw(0), dftu->get_size_eff_pot_pw()); hamilt::onsite_ps_op()( this->ctx, // device context diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp index 8bfac873d2..04d0744743 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp @@ -297,16 +297,16 @@ void Stress_Func::deriv_drhoc resmem_var_op()(gx_arr_d, rho_basis->ngg); resmem_var_op()(drhocg_d, rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, gx_arr_d, gx_arr.data(), rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, mesh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rab_d, rab, mesh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rhoc_d, rhoc, mesh); + syncmem_var_h2d_op()(gx_arr_d, gx_arr.data(), rho_basis->ngg); + syncmem_var_h2d_op()(r_d, r, mesh); + syncmem_var_h2d_op()(rab_d, rab, mesh); + syncmem_var_h2d_op()(rhoc_d, rhoc, mesh); } if(this->device == base_device::GpuDevice) { hamilt::cal_stress_drhoc_aux_op()( r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,mesh,igl0,rho_basis->ngg-igl0,omega,type); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); + syncmem_var_d2h_op()(drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); } else { hamilt::cal_stress_drhoc_aux_op()( diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp index 3b842496f1..42e619c9bc 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp @@ -252,14 +252,12 @@ const UnitCell& ucell_in resmem_var_op()(gx_arr_d, rho_basis->ngg+1); resmem_var_op()(drhocg_d, rho_basis->ngg); - syncmem_var_h2d_op()(this->ctx, - this->cpu_ctx, - gx_arr_d, + syncmem_var_h2d_op()(gx_arr_d, gx_arr.data(), rho_basis->ngg+1); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, msh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rab_d, rab, msh); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rhoc_d, aux.data(), msh); + syncmem_var_h2d_op()(r_d, r, msh); + syncmem_var_h2d_op()(rab_d, rab, msh); + syncmem_var_h2d_op()(rhoc_d, aux.data(), msh); } @@ -267,7 +265,7 @@ const UnitCell& ucell_in if(this->device == base_device::GpuDevice) { hamilt::cal_stress_drhoc_aux_op()( r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,msh,igl0,rho_basis->ngg-igl0,ucell_in.omega,3); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, dvloc+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); + syncmem_var_d2h_op()(dvloc+igl0, drhocg_d+igl0, rho_basis->ngg-igl0); } else { hamilt::cal_stress_drhoc_aux_op()( diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp index 383a1666ac..657e3c760d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp @@ -69,7 +69,7 @@ void Stress_Func::stress_nl(ModuleBase::matrix& sigma, } } // transfer stress from device to host - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, sigmanlc.data(), stress_device, 9); + syncmem_var_d2h_op()(sigmanlc.data(), stress_device, 9); delmem_var_op()(this->ctx, stress_device); // sum up forcenl from all processors for (int l = 0; l < 3; l++) diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp index 919be07ea3..21ace6f7cc 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp @@ -68,7 +68,7 @@ void Stress_Func::stress_onsite(ModuleBase::matrix& sigma, } } // transfer stress from device to host - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, sigma_onsite.data(), stress_device, 9); + syncmem_var_d2h_op()(sigma_onsite.data(), stress_device, 9); delmem_var_op()(this->ctx, stress_device); // sum up forcenl from all processors for (int l = 0; l < 3; l++) diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp index a278aea4dd..0ede5db9cd 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp @@ -161,9 +161,9 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral resmem_zd_op()(this->z_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); resmem_zd_op()(this->z_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); resmem_zd_op()(this->z_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); - syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, this->z_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); + syncmem_z2z_h2d_op()(this->z_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); + syncmem_z2z_h2d_op()(this->z_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); + syncmem_z2z_h2d_op()(this->z_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); } else { if (PARAM.inp.precision == "single") { diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp index 2fc457153a..6d255a787b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp @@ -92,10 +92,10 @@ void Structure_Factor::get_sk(Device* ctx, if (device == base_device::GpuDevice) { resmem_int_op()(atom_na, ucell->ntype); - syncmem_int_op()(ctx, cpu_ctx, atom_na, h_atom_na, ucell->ntype); + syncmem_int_op()(atom_na, h_atom_na, ucell->ntype); resmem_var_op()(atom_tau, ucell->nat * 3); - syncmem_var_op()(ctx, cpu_ctx, atom_tau, h_atom_tau, ucell->nat * 3); + syncmem_var_op()(atom_tau, h_atom_tau, ucell->nat * 3); igl2isz = wfc_basis->d_igl2isz_k; is2fftixy = wfc_basis->d_is2fftixy; diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_che.h b/source/module_hamilt_pw/hamilt_stodft/sto_che.h index 6fd099f0ea..7557bb065a 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_che.h +++ b/source/module_hamilt_pw/hamilt_stodft/sto_che.h @@ -56,9 +56,7 @@ REAL vTMv(const REAL* v, const REAL* M, const int n) REAL* dot_device = nullptr; base_device::memory::resize_memory_op()(dot_device, 1); container::kernels::blas_dot()(n, y, 1, v, 1, dot_device); - base_device::memory::synchronize_memory_op()(cpu_ctx, - ctx, - &result, + base_device::memory::synchronize_memory_op()(&result, dot_device, 1); base_device::memory::delete_memory_op()(ctx, y); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp index a0b8543eff..0b01acb148 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp @@ -250,7 +250,7 @@ void Sto_Forces::cal_sto_force_nl( nl_tools.cal_force(ik, max_nbands, nstobands, false, force, nksbands); } // end ik - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, forcenl.c, force, forcenl.nr * forcenl.nc); + syncmem_var_d2h_op()(forcenl.c, force, forcenl.nr * forcenl.nc); delmem_var_op()(this->ctx, force); // sum up forcenl from all processors Parallel_Reduce::reduce_all(forcenl.c, forcenl.nr * forcenl.nc); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp index adca009460..9a24bd7c7d 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp @@ -29,7 +29,7 @@ void Stochastic_Iter::dot(const int& n, const Real* x, const int& inc Real* result_device = nullptr; resmem_var_op()(result_device, 1); container::kernels::blas_dot()(n, p_che->coef_real, 1, spolyv, 1, result_device); - syncmem_var_d2h_op()(cpu_ctx, this->ctx, &result, result_device, 1); + syncmem_var_d2h_op()(&result, result_device, 1); delmem_var_op()(this->ctx, result_device); } @@ -65,7 +65,7 @@ void Stochastic_Iter::orthog(const int& ik, psi::Psi& psi, stowf.chi0->fix_k(ik); stowf.chiortho->fix_k(ik); T *wfgin = stowf.chi0->get_pointer(), *wfgout = stowf.chiortho->get_pointer(); - cpymem_complex_op()(this->ctx, this->ctx, wfgout, wfgin, npwx * nchipk); + cpymem_complex_op()(wfgout, wfgin, npwx * nchipk); // for (int ig = 0; ig < npwx * nchipk; ++ig) // { // wfgout[ig] = wfgin[ig]; @@ -209,8 +209,8 @@ void Stochastic_Iter::check_precision(const double ref, const double { Real last_coef = 0; Real last_spolyv = 0; - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, &last_coef, &p_che->coef_real[p_che->norder - 1], 1); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, &last_spolyv, &spolyv[p_che->norder - 1], 1); + syncmem_var_d2h_op()(&last_coef, &p_che->coef_real[p_che->norder - 1], 1); + syncmem_var_d2h_op()(&last_spolyv, &spolyv[p_che->norder - 1], 1); error = last_coef * last_spolyv; } else @@ -220,8 +220,8 @@ void Stochastic_Iter::check_precision(const double ref, const double // double last_spolyv = spolyv[norder * norder - 1]; Real last_coef = 0; Real last_spolyv = 0; - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, &last_coef, &p_che->coef_real[norder - 1], 1); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, &last_spolyv, &spolyv[norder * norder - 1], 1); + syncmem_var_d2h_op()(&last_coef, &p_che->coef_real[norder - 1], 1); + syncmem_var_d2h_op()(&last_spolyv, &spolyv[norder * norder - 1], 1); Real dot1 = 0, dot2 = 0; this->dot(norder, p_che->coef_real, 1, spolyv + norder * (norder - 1), 1, dot1); this->dot(norder, p_che->coef_real, 1, spolyv + norder - 1, norder, dot2); @@ -391,7 +391,7 @@ void Stochastic_Iter::calPn(const int& ik, Stochastic_WF& } if(ik == this->pkv->get_nks() - 1) { - syncmem_var_h2d_op()(this->ctx, cpu_ctx, spolyv, spolyv_cpu, norder); + syncmem_var_h2d_op()(spolyv, spolyv_cpu, norder); } } else diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp index 770b7319dd..f875604147 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp @@ -201,7 +201,7 @@ void Sto_Stress_PW::sto_stress_nl(ModuleBase::matrix& sigma, } // transfer stress from device to host - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, sigmanlc.data(), stress_device, 9); + syncmem_var_d2h_op()(sigmanlc.data(), stress_device, 9); delmem_var_op()(this->ctx, stress_device); // sum up forcenl from all processors for (int l = 0; l < 3; l++) diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp index dfec4f3a05..d3d720106c 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp @@ -374,9 +374,7 @@ void Stochastic_WF::sync_chi0() Device* ctx = {}; if (base_device::get_device_type(ctx) == base_device::GpuDevice) { - syncmem_h2d_op()(this->chi0->get_device(), - this->chi0_cpu->get_device(), - this->chi0->get_pointer(), + syncmem_h2d_op()(this->chi0->get_pointer(), this->chi0_cpu->get_pointer(), this->chi0_cpu->size()); } diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 8d156035ae..d89d2292c3 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -123,9 +123,7 @@ int Diago_DavSubspace::diag_once(const HPsiFunc& hpsi_func, { unconv[m] = m; - syncmem_complex_op()(this->ctx, - this->ctx, - this->psi_in_iter + m * this->dim, + syncmem_complex_op()(this->psi_in_iter + m * this->dim, psi_in + m * psi_in_dmax, this->dim); } @@ -228,9 +226,7 @@ int Diago_DavSubspace::diag_once(const HPsiFunc& hpsi_func, // update this->psi_in_iter according to psi_in for (size_t i = 0; i < this->n_band; i++) { - syncmem_complex_op()(this->ctx, - this->ctx, - this->psi_in_iter + i * this->dim, + syncmem_complex_op()(this->psi_in_iter + i * this->dim, psi_in + i * psi_in_dmax, this->dim); } @@ -273,7 +269,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, { if (unconv[i] != i) { - syncmem_complex_op()(this->ctx, this->ctx, vcc + i * this->nbase_x, vcc + unconv[i] * this->nbase_x, nbase); + syncmem_complex_op()(vcc + i * this->nbase_x, vcc + unconv[i] * this->nbase_x, nbase); (*eigenvalue_iter)[i] = (*eigenvalue_iter)[unconv[i]]; } } @@ -310,7 +306,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, e_temp_cpu.assign(nbase, (-1.0 * (*eigenvalue_iter)[m])); if (this->device == base_device::GpuDevice) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, e_temp_hd, e_temp_cpu.data(), nbase); + syncmem_var_h2d_op()(e_temp_hd, e_temp_cpu.data(), nbase); } vector_mul_vector_op()(this->ctx, nbase, @@ -356,7 +352,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, pre.data(), this->dim); + syncmem_var_h2d_op()(this->d_precondition, pre.data(), this->dim); vector_div_vector_op()(this->ctx, this->dim, psi_iter + (nbase + m) * this->dim, @@ -461,7 +457,7 @@ void Diago_DavSubspace::cal_elem(const int& dim, #else auto* swap = new T[notconv * this->nbase_x]; - syncmem_complex_op()(this->ctx, this->ctx, swap, hcc + nbase * this->nbase_x, notconv * this->nbase_x); + syncmem_complex_op()(swap, hcc + nbase * this->nbase_x, notconv * this->nbase_x); if (std::is_same::value) { @@ -491,7 +487,7 @@ void Diago_DavSubspace::cal_elem(const int& dim, this->diag_comm.comm); } - syncmem_complex_op()(this->ctx, this->ctx, swap, scc + nbase * this->nbase_x, notconv * this->nbase_x); + syncmem_complex_op()(swap, scc + nbase * this->nbase_x, notconv * this->nbase_x); if (base_device::get_current_precision(swap) == "single") { @@ -546,7 +542,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, Real* eigenvalue_gpu = nullptr; resmem_real_op()(eigenvalue_gpu, this->nbase_x); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalue_gpu, (*eigenvalue_iter).data(), this->nbase_x); + syncmem_var_h2d_op()(eigenvalue_gpu, (*eigenvalue_iter).data(), this->nbase_x); T* hcc_gpu = nullptr; T* scc_gpu = nullptr; @@ -556,19 +552,19 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, base_device::memory::resize_memory_op()(vcc_gpu, nbase * nbase); for(int i=0;i()(this->ctx, this->ctx, hcc_gpu + i * nbase, hcc + i * nbase_x, nbase); - base_device::memory::synchronize_memory_op()(this->ctx, this->ctx, scc_gpu + i * nbase, scc + i * nbase_x, nbase); + base_device::memory::synchronize_memory_op()(hcc_gpu + i * nbase, hcc + i * nbase_x, nbase); + base_device::memory::synchronize_memory_op()(scc_gpu + i * nbase, scc + i * nbase_x, nbase); } dngvd_op()(this->ctx, nbase, nbase, hcc_gpu, scc_gpu, eigenvalue_gpu, vcc_gpu); for(int i=0;i()(this->ctx, this->ctx, vcc + i * nbase_x, vcc_gpu + i * nbase, nbase); + base_device::memory::synchronize_memory_op()(vcc + i * nbase_x, vcc_gpu + i * nbase, nbase); } delmem_complex_op()(this->ctx, hcc_gpu); delmem_complex_op()(this->ctx, scc_gpu); delmem_complex_op()(this->ctx, vcc_gpu); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, (*eigenvalue_iter).data(), eigenvalue_gpu, this->nbase_x); + syncmem_var_d2h_op()((*eigenvalue_iter).data(), eigenvalue_gpu, this->nbase_x); delmem_real_op()(this->ctx, eigenvalue_gpu); } @@ -715,7 +711,7 @@ void Diago_DavSubspace::refresh(const int& dim, this->dim); // update hphi - syncmem_complex_op()(this->ctx, this->ctx, hphi, psi_iter + nband * this->dim, this->dim * nband); + syncmem_complex_op()(hphi, psi_iter + nband * this->dim, this->dim * nband); nbase = nband; @@ -743,9 +739,9 @@ void Diago_DavSubspace::refresh(const int& dim, this->nbase_x * this->nbase_x, "DAV::vcc"); - syncmem_d2h_op()(this->cpu_ctx, this->ctx, hcc_cpu, hcc, this->nbase_x * this->nbase_x); - syncmem_d2h_op()(this->cpu_ctx, this->ctx, scc_cpu, scc, this->nbase_x * this->nbase_x); - syncmem_d2h_op()(this->cpu_ctx, this->ctx, vcc_cpu, vcc, this->nbase_x * this->nbase_x); + syncmem_d2h_op()(hcc_cpu, hcc, this->nbase_x * this->nbase_x); + syncmem_d2h_op()(scc_cpu, scc, this->nbase_x * this->nbase_x); + syncmem_d2h_op()(vcc_cpu, vcc, this->nbase_x * this->nbase_x); for (int i = 0; i < nbase; i++) { @@ -754,9 +750,9 @@ void Diago_DavSubspace::refresh(const int& dim, vcc_cpu[i * this->nbase_x + i] = this->one[0]; } - syncmem_h2d_op()(this->ctx, this->cpu_ctx, hcc, hcc_cpu, this->nbase_x * this->nbase_x); - syncmem_h2d_op()(this->ctx, this->cpu_ctx, scc, scc_cpu, this->nbase_x * this->nbase_x); - syncmem_h2d_op()(this->ctx, this->cpu_ctx, vcc, vcc_cpu, this->nbase_x * this->nbase_x); + syncmem_h2d_op()(hcc, hcc_cpu, this->nbase_x * this->nbase_x); + syncmem_h2d_op()(scc, scc_cpu, this->nbase_x * this->nbase_x); + syncmem_h2d_op()(vcc, vcc_cpu, this->nbase_x * this->nbase_x); base_device::memory::delete_memory_op()(this->cpu_ctx, hcc_cpu); base_device::memory::delete_memory_op()(this->cpu_ctx, scc_cpu); diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index 24d4a40429..200a6705d9 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -97,7 +97,7 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, if (this->device == base_device::GpuDevice) { resmem_var_op()(this->d_precondition, dim); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, this->precondition, dim); + syncmem_var_h2d_op()(this->d_precondition, this->precondition, dim); } #endif } @@ -179,7 +179,7 @@ int DiagoDavid::diag_once(const HPsiFunc& hpsi_func, // begin SchmidtOrth for (int m = 0; m < nband; m++) { - syncmem_complex_op()(this->ctx, this->ctx, basis + dim*m, psi_in + m*ld_psi, dim); + syncmem_complex_op()(basis + dim*m, psi_in + m*ld_psi, dim); this->SchmidtOrth(dim, nband, @@ -370,9 +370,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // vc_ev_vector[m * nbase + i] = vcc[i * nbase_x + unconv[m]]; for (int m = 0; m < notconv; m++) { - syncmem_complex_op()(this->ctx, - this->ctx, - vc_ev_vector + m * nbase, + syncmem_complex_op()(vc_ev_vector + m * nbase, vcc + unconv[m] * nbase_x, nbase); } @@ -418,7 +416,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, #if defined(__CUDA) || defined(__ROCM) Real* e_temp_gpu = nullptr; resmem_var_op()(e_temp_gpu, nbase); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, e_temp_gpu, e_temp_cpu.data(), nbase); + syncmem_var_h2d_op()(e_temp_gpu, e_temp_cpu.data(), nbase); vector_mul_vector_op()(this->ctx, nbase, vc_ev_vector + m * nbase, @@ -633,7 +631,7 @@ void DiagoDavid::cal_elem(const int& dim, // matrixTranspose_op()(this->ctx, nbase_x, nbase_x, scc, scc); auto* swap = new T[notconv * nbase_x]; - syncmem_complex_op()(this->ctx, this->ctx, swap, hcc + nbase * nbase_x, notconv * nbase_x); + syncmem_complex_op()(swap, hcc + nbase * nbase_x, notconv * nbase_x); if (std::is_same::value) { Parallel_Reduce::reduce_pool(hcc + nbase * nbase_x, notconv * nbase_x); @@ -699,11 +697,11 @@ void DiagoDavid::diag_zhegvx(const int& nbase, #if defined(__CUDA) || defined(__ROCM) Real* eigenvalue_gpu = nullptr; resmem_var_op()(eigenvalue_gpu, nbase_x); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalue_gpu, this->eigenvalue, nbase_x); + syncmem_var_h2d_op()(eigenvalue_gpu, this->eigenvalue, nbase_x); dnevx_op()(this->ctx, nbase, nbase_x, hcc, nband, eigenvalue_gpu, vcc); - syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, this->eigenvalue, eigenvalue_gpu, nbase_x); + syncmem_var_d2h_op()(this->eigenvalue, eigenvalue_gpu, nbase_x); delmem_var_op()(this->ctx, eigenvalue_gpu); #endif } @@ -788,8 +786,8 @@ void DiagoDavid::refresh(const int& dim, ); // hpsi = basis, spsi = basis[nband] - syncmem_complex_op()(this->ctx, this->ctx, hpsi, basis, dim * nband); - syncmem_complex_op()(this->ctx, this->ctx, spsi, basis + dim*nband, dim * nband); + syncmem_complex_op()(hpsi, basis, dim * nband); + syncmem_complex_op()(spsi, basis + dim*nband, dim * nband); /*for (int m = 0; m < nband; m++) { for (int ig = 0; ig < dim; ig++) { @@ -803,7 +801,7 @@ void DiagoDavid::refresh(const int& dim, for (int m = 0; m < nband; m++) { - syncmem_complex_op()(this->ctx, this->ctx, basis + dim*m,psi_in + m*ld_psi, dim); + syncmem_complex_op()(basis + dim*m,psi_in + m*ld_psi, dim); /*for (int ig = 0; ig < npw; ig++) basis(m, ig) = psi(m, ig);*/ } @@ -833,9 +831,9 @@ void DiagoDavid::refresh(const int& dim, nbase_x * nbase_x, "DAV::vcc"); - syncmem_d2h_op()(this->cpu_ctx, this->ctx, hcc_cpu, hcc, nbase_x * nbase_x); + syncmem_d2h_op()(hcc_cpu, hcc, nbase_x * nbase_x); // syncmem_d2h_op()(this->cpu_ctx, this->ctx, scc_cpu, scc, nbase_x * nbase_x); - syncmem_d2h_op()(this->cpu_ctx, this->ctx, vcc_cpu, vcc, nbase_x * nbase_x); + syncmem_d2h_op()(vcc_cpu, vcc, nbase_x * nbase_x); for (int i = 0; i < nbase; i++) { @@ -844,9 +842,9 @@ void DiagoDavid::refresh(const int& dim, vcc_cpu[i * nbase_x + i] = this->one[0]; } - syncmem_h2d_op()(this->ctx, this->cpu_ctx, hcc, hcc_cpu, nbase_x * nbase_x); + syncmem_h2d_op()(hcc, hcc_cpu, nbase_x * nbase_x); // syncmem_h2d_op()(this->ctx, this->cpu_ctx, scc, scc_cpu, nbase_x * nbase_x); - syncmem_h2d_op()(this->ctx, this->cpu_ctx, vcc, vcc_cpu, nbase_x * nbase_x); + syncmem_h2d_op()(vcc, vcc_cpu, nbase_x * nbase_x); base_device::memory::delete_memory_op()(this->cpu_ctx, hcc_cpu); // base_device::memory::delete_memory_op()(this->cpu_ctx, scc_cpu); @@ -937,7 +935,7 @@ void DiagoDavid::SchmidtOrth(const int& dim, Parallel_Reduce::reduce_pool(lagrange_m, m + 1); T var = *this->zero; - syncmem_d2h_op()(this->cpu_ctx, this->ctx, &var, lagrange_m + m, 1); + syncmem_d2h_op()(&var, lagrange_m + m, 1); double psi_norm = get_real(var); assert(psi_norm > 0.0); diff --git a/source/module_hsolver/diago_iter_assist.cpp b/source/module_hsolver/diago_iter_assist.cpp index 4f4aabd26c..223df68006 100644 --- a/source/module_hsolver/diago_iter_assist.cpp +++ b/source/module_hsolver/diago_iter_assist.cpp @@ -214,7 +214,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* for (int i = 0; i < nstart; i++) { // psi_temp is one band psi, psi is all bands psi, the range always is 1 for the only band in psi_temp - syncmem_complex_op()(ctx, ctx, ppsi, psi + i * psi_nc, psi_nc); + syncmem_complex_op()(ppsi, psi + i * psi_nc, psi_nc); psi::Range band_by_band_range(true, 0, 0, 0); hpsi_info hpsi_in(&psi_temp, band_by_band_range, hpsi); @@ -229,7 +229,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* // do sPsi band by band for (int i = 0; i < nstart; i++) { - syncmem_complex_op()(ctx, ctx, ppsi, psi + i * psi_nc, psi_nc); + syncmem_complex_op()(ppsi, psi + i * psi_nc, psi_nc); pHamilt->sPsi(ppsi, spsi, dmin, dmin, 1); gemv_op()(ctx, @@ -252,7 +252,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* psi::Psi psi_temp(1, nstart, psi_nc, dmin, true); T* ppsi = psi_temp.get_pointer(); - syncmem_complex_op()(ctx, ctx, ppsi, psi, psi_temp.size()); + syncmem_complex_op()(ppsi, psi, psi_temp.size()); // hpsi and spsi share the temp space T* temp = nullptr; resmem_complex_op()(temp, nstart * psi_nc, "DiagSub::temp"); @@ -386,13 +386,13 @@ void DiagoIterAssist::diagH_LAPACK(const int nstart, { #if ((defined __CUDA) || (defined __ROCM)) // set eigenvalues in GPU to e in CPU - syncmem_var_d2h_op()(cpu_ctx, gpu_ctx, e, eigenvalues, nbands); + syncmem_var_d2h_op()(e, eigenvalues, nbands); #endif } else if (base_device::get_device_type(ctx) == base_device::CpuDevice) { // set eigenvalues in CPU to e in CPU - syncmem_var_op()(ctx, ctx, e, eigenvalues, nbands); + syncmem_var_op()(e, eigenvalues, nbands); } delmem_var_op()(ctx, eigenvalues); diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index de627d3474..81fec6cb6e 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -450,8 +450,6 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, else { base_device::memory::synchronize_memory_op()( - this->ctx, - this->ctx, spsi_out.data(), psi_in.data(), static_cast((ndim == 1 ? 1 : psi_in.shape().dim_size(0)) diff --git a/source/module_hsolver/kernels/cuda/math_kernel_op.cu b/source/module_hsolver/kernels/cuda/math_kernel_op.cu index 2318e14d57..3ad26ef3a2 100644 --- a/source/module_hsolver/kernels/cuda/math_kernel_op.cu +++ b/source/module_hsolver/kernels/cuda/math_kernel_op.cu @@ -906,8 +906,6 @@ void matrixTranspose_op::operator()(const base_ } base_device::memory::synchronize_memory_op()( - d, - d, output_matrix, device_temp, row * col); @@ -947,8 +945,6 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator( } base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - d, - d, output_matrix, device_temp, row * col); @@ -989,7 +985,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(d, d, output_matrix, device_temp, row * col); + base_device::DEVICE_GPU>()(output_matrix, device_temp, row * col); base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(d, device_temp); } diff --git a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu index 0b9b11970e..d4185ff81c 100644 --- a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu +++ b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu @@ -823,8 +823,6 @@ void matrixTranspose_op::operator()(const base_ } base_device::memory::synchronize_memory_op()( - d, - d, output_matrix, device_temp, row * col); @@ -863,8 +861,6 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator( } base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - d, - d, output_matrix, device_temp, row * col); @@ -898,7 +894,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(d, d, output_matrix, device_temp, row * col); + base_device::DEVICE_GPU>()(output_matrix, device_temp, row * col); base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(d, device_temp); } diff --git a/source/module_hsolver/kernels/test/math_dngvd_test.cpp b/source/module_hsolver/kernels/test/math_dngvd_test.cpp index 71a41073f0..a67b18d4be 100644 --- a/source/module_hsolver/kernels/test/math_dngvd_test.cpp +++ b/source/module_hsolver/kernels/test/math_dngvd_test.cpp @@ -141,7 +141,7 @@ TEST_F(TestModuleHsolverMathDngvd, transpose_gpu) }; std::complex* device_transpose = nullptr; resize_memory_op_Z()(device_transpose, matrix_size); - synchronize_memory_op_C2G_Z()(gpu_ctx, cpu_ctx, device_transpose, transpose.data(), transpose.size()); + synchronize_memory_op_C2G_Z()(device_transpose, transpose.data(), transpose.size()); // run hsolver::createGpuBlasHandle(); @@ -162,7 +162,7 @@ TEST_F(TestModuleHsolverMathDngvd, transpose_gpu) {0.0, 0.0}, // {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0} }; - synchronize_memory_op_G2C_Z()(cpu_ctx, gpu_ctx, transpose_result.data(), device_transpose, transpose.size()); + synchronize_memory_op_G2C_Z()(transpose_result.data(), device_transpose, transpose.size()); // std::vector > test_result = { // {-0.351417,-1.73472}, {-0.351417,-1.73472}, {-0.351417,-1.73472}, diff --git a/source/module_hsolver/kernels/test/math_kernel_test.cpp b/source/module_hsolver/kernels/test/math_kernel_test.cpp index ca5116a310..e69bc29b5f 100644 --- a/source/module_hsolver/kernels/test/math_kernel_test.cpp +++ b/source/module_hsolver/kernels/test/math_kernel_test.cpp @@ -373,8 +373,8 @@ TEST_F(TestModuleHsolverMathKernel, zdot_real_op_gpu) std::complex*psi_L_dev = NULL, *psi_R_dev = NULL; resize_memory_op()(psi_L_dev, psi_L.size()); resize_memory_op()(psi_R_dev, psi_R.size()); - synchronize_memory_op()(gpu_ctx, cpu_ctx, psi_L_dev, psi_L.data(), psi_L.size()); - synchronize_memory_op()(gpu_ctx, cpu_ctx, psi_R_dev, psi_R.data(), psi_R.size()); + synchronize_memory_op()(psi_L_dev, psi_L.data(), psi_L.size()); + synchronize_memory_op()(psi_R_dev, psi_R.data(), psi_R.size()); hsolver::createGpuBlasHandle(); double result = zdot_real_gpu_op()(gpu_ctx, dim, psi_L_dev, psi_R_dev, false); hsolver::destoryBLAShandle(); @@ -393,11 +393,11 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_constant_op_gpu) resize_memory_op()(input_dev, input.size()); resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, input_dev, input.data(), input.size()); + synchronize_memory_op()(input_dev, input.data(), input.size()); // run vector_div_constant_op_gpu()(gpu_ctx, dim, output_dev, input_dev, constant); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, output.data(), output_dev, output.size()); + synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); for (int i = 0; i < input.size(); i++) { @@ -424,14 +424,14 @@ TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, input_dev, input.data(), input.size()); - synchronize_memory_op_double()(gpu_ctx, cpu_ctx, input_double_dev, input_double.data(), input.size()); + synchronize_memory_op()(input_dev, input.data(), input.size()); + synchronize_memory_op_double()(input_double_dev, input_double.data(), input.size()); // run vector_mul_vector_op_gpu()(gpu_ctx, dim, output_dev, input_dev, input_double_dev); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, output.data(), output_dev, output.size()); + synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); for (int i = 0; i < input.size(); i++) { @@ -460,14 +460,14 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_gpu) resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, input_dev, input.data(), input.size()); - synchronize_memory_op_double()(gpu_ctx, cpu_ctx, input_double_dev, input_double.data(), input.size()); + synchronize_memory_op()(input_dev, input.data(), input.size()); + synchronize_memory_op_double()(input_double_dev, input_double.data(), input.size()); // run vector_div_vector_op_gpu()(gpu_ctx, dim, output_dev, input_dev, input_double_dev); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, output.data(), output_dev, output.size()); + synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); for (int i = 0; i < input.size(); i++) { @@ -496,8 +496,8 @@ TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_gp resize_memory_op()(output_dev, input.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, input1_dev, input1.data(), input.size()); - synchronize_memory_op()(gpu_ctx, cpu_ctx, input2_dev, input2.data(), input.size()); + synchronize_memory_op()(input1_dev, input1.data(), input.size()); + synchronize_memory_op()(input2_dev, input2.data(), input.size()); // run constantvector_addORsub_constantVector_op_gpu()(gpu_ctx, @@ -509,7 +509,7 @@ TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_gp constant2); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, output.data(), output_dev, output.size()); + synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); for (int i = 0; i < input.size(); i++) { @@ -533,8 +533,8 @@ TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu) resize_memory_op()(Y_axpy_dev, Y_axpy.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, X_axpy_dev, X_axpy.data(), X_axpy.size()); - synchronize_memory_op()(gpu_ctx, cpu_ctx, Y_axpy_dev, Y_axpy.data(), Y_axpy.size()); + synchronize_memory_op()(X_axpy_dev, X_axpy.data(), X_axpy.size()); + synchronize_memory_op()(Y_axpy_dev, Y_axpy.data(), Y_axpy.size()); // run hsolver::createGpuBlasHandle(); @@ -542,7 +542,7 @@ TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu) hsolver::destoryBLAShandle(); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, Y_axpy.data(), Y_axpy_dev, Y_axpy.size()); + synchronize_memory_op_gpu()(Y_axpy.data(), Y_axpy_dev, Y_axpy.size()); for (int i = 0; i < input.size(); i++) { @@ -563,7 +563,7 @@ TEST_F(TestModuleHsolverMathKernel, scal_op_gpu) resize_memory_op()(X_scal_dev, X_scal.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, X_scal_dev, X_scal.data(), X_scal.size()); + synchronize_memory_op()(X_scal_dev, X_scal.data(), X_scal.size()); // run hsolver::createGpuBlasHandle(); @@ -571,7 +571,7 @@ TEST_F(TestModuleHsolverMathKernel, scal_op_gpu) hsolver::destoryBLAShandle(); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, X_scal.data(), X_scal_dev, X_scal.size()); + synchronize_memory_op_gpu()(X_scal.data(), X_scal_dev, X_scal.size()); for (int i = 0; i < input.size(); i++) { @@ -594,16 +594,16 @@ TEST_F(TestModuleHsolverMathKernel, gemv_op_gpu) resize_memory_op()(Y_gemv_dev, Y_gemv.size()); // syn the input data in CPU to GPU - synchronize_memory_op()(gpu_ctx, cpu_ctx, A_gemv_dev, A_gemv.data(), A_gemv.size()); - synchronize_memory_op()(gpu_ctx, cpu_ctx, X_gemv_dev, X_gemv.data(), X_gemv.size()); - synchronize_memory_op()(gpu_ctx, cpu_ctx, Y_gemv_dev, Y_gemv.data(), Y_gemv.size()); + synchronize_memory_op()(A_gemv_dev, A_gemv.data(), A_gemv.size()); + synchronize_memory_op()(X_gemv_dev, X_gemv.data(), X_gemv.size()); + synchronize_memory_op()(Y_gemv_dev, Y_gemv.data(), Y_gemv.size()); // run hsolver::createGpuBlasHandle(); gemv_op_gpu()(gpu_ctx, 'C', 2, 3, &ModuleBase::ONE, A_gemv_dev, 2, X_gemv_dev, 1, &ModuleBase::ONE, Y_gemv_dev, 1); hsolver::destoryBLAShandle(); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(cpu_ctx, gpu_ctx, Y_gemv.data(), Y_gemv_dev, Y_gemv.size()); + synchronize_memory_op_gpu()(Y_gemv.data(), Y_gemv_dev, Y_gemv.size()); // cal right answer: Y_test_gemv char trans = 'C'; @@ -656,18 +656,14 @@ TEST_F(TestModuleHsolverMathKernel, matrixSetToAnother_op_gpu) std::complex* device_A = nullptr; base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_A, A.size()); base_device::memory:: - synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(gpu_ctx, - cpu_ctx, - device_A, + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(device_A, A.data(), A.size()); std::complex* device_B = nullptr; base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_B, B.size()); base_device::memory:: - synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(gpu_ctx, - cpu_ctx, - device_B, + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(device_B, B.data(), B.size()); @@ -682,9 +678,7 @@ TEST_F(TestModuleHsolverMathKernel, matrixSetToAnother_op_gpu) std::vector> B_gpu2cpu(8); base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, - base_device::DEVICE_GPU>()(cpu_ctx, - gpu_ctx, - B_gpu2cpu.data(), + base_device::DEVICE_GPU>()(B_gpu2cpu.data(), device_B, B_gpu2cpu.size()); diff --git a/source/module_hsolver/kernels/test/perf_math_kernel.cpp b/source/module_hsolver/kernels/test/perf_math_kernel.cpp index 3ea380ba13..b2b0704a9d 100644 --- a/source/module_hsolver/kernels/test/perf_math_kernel.cpp +++ b/source/module_hsolver/kernels/test/perf_math_kernel.cpp @@ -107,12 +107,12 @@ class PerfModuleHsolverMathKernel : public benchmark::Fixture { resize_memory_op()(test_zvector_a_gpu, dim_vector); resize_memory_op()(test_zvector_b_gpu, dim_vector); - synchronize_memory_op()(gpu_ctx, cpu_ctx, test_zvector_a_gpu, test_zvector_a, dim_vector); - synchronize_memory_op()(gpu_ctx, cpu_ctx, test_zvector_b_gpu, test_zvector_b, dim_vector); + synchronize_memory_op()(test_zvector_a_gpu, test_zvector_a, dim_vector); + synchronize_memory_op()(test_zvector_b_gpu, test_zvector_b, dim_vector); resize_memory_op()(result_zvector_gpu, dim_vector); - resize_memory_op_double()(gpu_ctx, test_dvector_a_gpu, dim_vector); - synchronize_memory_op_double()(gpu_ctx, cpu_ctx, test_dvector_a_gpu, test_dvector_a, dim_vector); + resize_memory_op_double()(test_dvector_a_gpu, dim_vector); + synchronize_memory_op_double()(test_dvector_a_gpu, test_dvector_a, dim_vector); hsolver::createGpuBlasHandle(); diff --git a/source/module_psi/psi.cpp b/source/module_psi/psi.cpp index 0fdbe5d742..01d606ec8d 100644 --- a/source/module_psi/psi.cpp +++ b/source/module_psi/psi.cpp @@ -201,9 +201,7 @@ Psi::Psi(const Psi& psi_in) // this function will copy psi_in.psi to this->psi no matter the device types of each other. this->resize(psi_in.get_nk(), psi_in.get_nbands(), psi_in.get_nbasis()); - base_device::memory::synchronize_memory_op()(this->ctx, - psi_in.get_device(), - this->psi, + base_device::memory::synchronize_memory_op()(this->psi, psi_in.get_pointer() - psi_in.get_psi_bias(), psi_in.size()); this->psi_bias = psi_in.get_psi_bias(); @@ -245,9 +243,7 @@ Psi::Psi(const Psi& psi_in) - psi_in.get_psi_bias(), psi_in.size()); // synchronize the memory from CPU to GPU - base_device::memory::synchronize_memory_op()(this->ctx, - psi_in.get_device(), - this->psi, + base_device::memory::synchronize_memory_op()(this->psi, arr, psi_in.size()); free(arr); @@ -269,7 +265,7 @@ template void Psi::set_all_psi(const T* another_pointer, const std::size_t size_in) { assert(size_in == this->size()); - synchronize_memory_op()(this->ctx, this->ctx, this->psi, another_pointer, this->size()); + synchronize_memory_op()(this->psi, another_pointer, this->size()); } template diff --git a/source/module_psi/psi_init.cpp b/source/module_psi/psi_init.cpp index 2cdce4a5a8..102e2d4b1a 100644 --- a/source/module_psi/psi_init.cpp +++ b/source/module_psi/psi_init.cpp @@ -139,7 +139,7 @@ void PSIInit::initialize_psi(Psi>* psi, this->psi_initer->init_psig(psi_cpu->get_pointer(), ik); if (psi_device->get_pointer() != psi_cpu->get_pointer()) { - syncmem_h2d_op()(ctx, cpu_ctx, psi_device->get_pointer(), psi_cpu->get_pointer(), nbands_start * nbasis); + syncmem_h2d_op()(psi_device->get_pointer(), psi_cpu->get_pointer(), nbands_start * nbasis); } std::vector::type> etatom(nbands_start, 0.0); @@ -170,7 +170,7 @@ void PSIInit::initialize_psi(Psi>* psi, { if (psi_device->get_pointer() != kspw_psi->get_pointer()) { - syncmem_complex_op()(ctx, ctx, kspw_psi->get_pointer(), psi_device->get_pointer(), nbands * nbasis); + syncmem_complex_op()(kspw_psi->get_pointer(), psi_device->get_pointer(), nbands * nbasis); } } } // end k-point loop From 248a9e2373a7e3348cb396ed182fed8c13a3bc1b Mon Sep 17 00:00:00 2001 From: critsium-xy Date: Thu, 16 Jan 2025 18:06:37 +0800 Subject: [PATCH 6/7] Remove ctx parameters in cast_memory_op --- .../module_device/cuda/memory_op.cu | 12 ++--- .../module_base/module_device/memory_op.cpp | 16 ++----- source/module_base/module_device/memory_op.h | 4 +- .../module_device/rocm/memory_op.hip.cu | 12 ++--- source/module_basis/module_pw/pw_basis_k.cpp | 12 ++--- source/module_elecstate/elecstate_pw.cpp | 4 +- .../module_elecstate/elecstate_pw_cal_tau.cpp | 2 +- source/module_elecstate/elecstate_pw_sdft.cpp | 2 +- .../potentials/potential_new.cpp | 16 ++----- source/module_esolver/esolver_ks_pw.cpp | 4 +- .../hamilt_pwdft/VNL_in_pw.cpp | 44 ++++++++----------- .../hamilt_pwdft/structure_factor.cpp | 12 ++--- .../hamilt_stodft/sto_iter.cpp | 4 +- source/module_hsolver/hsolver_lcaopw.cpp | 2 - source/module_hsolver/hsolver_pw.cpp | 2 - source/module_psi/psi.cpp | 8 +--- 16 files changed, 54 insertions(+), 102 deletions(-) diff --git a/source/module_base/module_device/cuda/memory_op.cu b/source/module_base/module_device/cuda/memory_op.cu index 1fecb1b66e..e64d5857b1 100644 --- a/source/module_base/module_device/cuda/memory_op.cu +++ b/source/module_base/module_device/cuda/memory_op.cu @@ -115,9 +115,7 @@ void synchronize_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -134,9 +132,7 @@ struct cast_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -161,9 +157,7 @@ struct cast_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { if (size == 0) {return;} diff --git a/source/module_base/module_device/memory_op.cpp b/source/module_base/module_device/memory_op.cpp index 2365467b70..290b3d35af 100644 --- a/source/module_base/module_device/memory_op.cpp +++ b/source/module_base/module_device/memory_op.cpp @@ -73,9 +73,7 @@ struct synchronize_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -202,9 +200,7 @@ struct synchronize_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -214,9 +210,7 @@ struct cast_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -226,9 +220,7 @@ struct cast_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { diff --git a/source/module_base/module_device/memory_op.h b/source/module_base/module_device/memory_op.h index ca3457d28b..61e8f012bf 100644 --- a/source/module_base/module_device/memory_op.h +++ b/source/module_base/module_device/memory_op.h @@ -69,9 +69,7 @@ struct cast_memory_op /// /// Output Parameters /// \param arr_out : output array initialized by the input array - void operator()(const Device_out* dev_out, - const Device_in* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size); }; diff --git a/source/module_base/module_device/rocm/memory_op.hip.cu b/source/module_base/module_device/rocm/memory_op.hip.cu index b51257d28d..10d78947e0 100644 --- a/source/module_base/module_device/rocm/memory_op.hip.cu +++ b/source/module_base/module_device/rocm/memory_op.hip.cu @@ -87,9 +87,7 @@ void synchronize_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -102,9 +100,7 @@ struct cast_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_GPU* dev_out, - const base_device::DEVICE_CPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { @@ -131,9 +127,7 @@ struct cast_memory_op struct cast_memory_op { - void operator()(const base_device::DEVICE_CPU* dev_out, - const base_device::DEVICE_GPU* dev_in, - FPTYPE_out* arr_out, + void operator()(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp index 980490dc81..5af491c40a 100644 --- a/source/module_basis/module_pw/pw_basis_k.cpp +++ b/source/module_basis/module_pw/pw_basis_k.cpp @@ -100,7 +100,7 @@ void PW_Basis_K:: initparameters( if (this->device == "gpu") { if (this->precision == "single") { resmem_sd_op()(this->s_kvec_c, this->nks * 3); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); + castmem_d2s_h2d_op()(this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } resmem_dd_op()(this->d_kvec_c, this->nks * 3); syncmem_d2d_h2d_op()(this->d_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); @@ -109,7 +109,7 @@ void PW_Basis_K:: initparameters( #endif if (this->precision == "single") { resmem_sh_op()(this->s_kvec_c, this->nks * 3); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); + castmem_d2s_h2h_op()(this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); } this->d_kvec_c = reinterpret_cast(&this->kvec_c[0][0]); // There's no need to allocate double pointers while in a CPU environment. @@ -249,8 +249,8 @@ void PW_Basis_K::collect_local_pw(const double& erf_ecut_in, const double& erf_h if (this->precision == "single") { resmem_sd_op()(this->s_gk2, this->npwk_max * this->nks); resmem_sd_op()(this->s_gcar, this->npwk_max * this->nks * 3); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_gk2, this->gk2, this->npwk_max * this->nks); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); + castmem_d2s_h2d_op()(this->s_gk2, this->gk2, this->npwk_max * this->nks); + castmem_d2s_h2d_op()(this->s_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); } else { resmem_dd_op()(this->d_gk2, this->npwk_max * this->nks); @@ -264,8 +264,8 @@ void PW_Basis_K::collect_local_pw(const double& erf_ecut_in, const double& erf_h if (this->precision == "single") { resmem_sh_op()(this->s_gk2, this->npwk_max * this->nks, "PW_B_K::s_gk2"); resmem_sh_op()(this->s_gcar, this->npwk_max * this->nks * 3, "PW_B_K::s_gcar"); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_gk2, this->gk2, this->npwk_max * this->nks); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); + castmem_d2s_h2h_op()(this->s_gk2, this->gk2, this->npwk_max * this->nks); + castmem_d2s_h2h_op()(this->s_gcar, reinterpret_cast(&this->gcar[0][0]), this->npwk_max * this->nks * 3); } else { this->d_gcar = reinterpret_cast(&this->gcar[0][0]); diff --git a/source/module_elecstate/elecstate_pw.cpp b/source/module_elecstate/elecstate_pw.cpp index abe0d4d43d..9ac8d0f14e 100644 --- a/source/module_elecstate/elecstate_pw.cpp +++ b/source/module_elecstate/elecstate_pw.cpp @@ -142,10 +142,10 @@ void ElecStatePW::psiToRho(const psi::Psi& psi) { for (int ii = 0; ii < PARAM.inp.nspin; ii++) { - castmem_var_d2h_op()(cpu_ctx, this->ctx, this->charge->rho[ii], this->rho[ii], this->charge->nrxx); + castmem_var_d2h_op()(this->charge->rho[ii], this->rho[ii], this->charge->nrxx); if (get_xc_func_type() == 3) { - castmem_var_d2h_op()(cpu_ctx, this->ctx, this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); + castmem_var_d2h_op()(this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); } } } diff --git a/source/module_elecstate/elecstate_pw_cal_tau.cpp b/source/module_elecstate/elecstate_pw_cal_tau.cpp index 98c9a24860..5c225c3d62 100644 --- a/source/module_elecstate/elecstate_pw_cal_tau.cpp +++ b/source/module_elecstate/elecstate_pw_cal_tau.cpp @@ -52,7 +52,7 @@ void ElecStatePW::cal_tau(const psi::Psi& psi) } if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { for (int ii = 0; ii < PARAM.inp.nspin; ii++) { - castmem_var_d2h_op()(cpu_ctx, this->ctx, this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); + castmem_var_d2h_op()(this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); } } this->parallelK(); diff --git a/source/module_elecstate/elecstate_pw_sdft.cpp b/source/module_elecstate/elecstate_pw_sdft.cpp index ea9e4463c8..bef6277adb 100644 --- a/source/module_elecstate/elecstate_pw_sdft.cpp +++ b/source/module_elecstate/elecstate_pw_sdft.cpp @@ -28,7 +28,7 @@ void ElecStatePW_SDFT::psiToRho(const psi::Psi& psi) } if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { for (int ii = 0; ii < nspin; ii++) { - castmem_var_d2h_op()(cpu_ctx, this->ctx, this->charge->rho[ii], this->rho[ii], this->charge->nrxx); + castmem_var_d2h_op()(this->charge->rho[ii], this->rho[ii], this->charge->nrxx); } } this->parallelK(); diff --git a/source/module_elecstate/potentials/potential_new.cpp b/source/module_elecstate/potentials/potential_new.cpp index 56c8446138..f1cc883bd8 100644 --- a/source/module_elecstate/potentials/potential_new.cpp +++ b/source/module_elecstate/potentials/potential_new.cpp @@ -181,14 +181,10 @@ void Potential::update_from_charge(const Charge*const chg, const UnitCell*const if (PARAM.inp.basis_type == "pw" && PARAM.inp.device == "gpu") { if (PARAM.inp.precision == "single") { - castmem_d2s_h2d_op()(gpu_ctx, - cpu_ctx, - s_veff_smooth, + castmem_d2s_h2d_op()(s_veff_smooth, this->veff_smooth.c, this->veff_smooth.nr * this->veff_smooth.nc); - castmem_d2s_h2d_op()(gpu_ctx, - cpu_ctx, - s_vofk_smooth, + castmem_d2s_h2d_op()(s_vofk_smooth, this->vofk_smooth.c, this->vofk_smooth.nr * this->vofk_smooth.nc); } @@ -203,14 +199,10 @@ void Potential::update_from_charge(const Charge*const chg, const UnitCell*const } else { if (PARAM.inp.precision == "single") { - castmem_d2s_h2h_op()(cpu_ctx, - cpu_ctx, - s_veff_smooth, + castmem_d2s_h2h_op()(s_veff_smooth, this->veff_smooth.c, this->veff_smooth.nr * this->veff_smooth.nc); - castmem_d2s_h2h_op()(cpu_ctx, - cpu_ctx, - s_vofk_smooth, + castmem_d2s_h2h_op()(s_vofk_smooth, this->vofk_smooth.c, this->vofk_smooth.nr * this->vofk_smooth.nc); } diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp index 84bf0fe8a4..a96d487a5c 100644 --- a/source/module_esolver/esolver_ks_pw.cpp +++ b/source/module_esolver/esolver_ks_pw.cpp @@ -646,9 +646,7 @@ void ESolver_KS_PW::after_scf(UnitCell& ucell, const int istep) // 4) Transfer data from GPU to CPU if (this->device == base_device::GpuDevice) { - castmem_2d_d2h_op()(this->psi[0].get_device(), - this->kspw_psi[0].get_device(), - this->psi[0].get_pointer() - this->psi[0].get_psi_bias(), + castmem_2d_d2h_op()(this->psi[0].get_pointer() - this->psi[0].get_psi_bias(), this->kspw_psi[0].get_pointer() - this->kspw_psi[0].get_psi_bias(), this->psi[0].size()); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index a77c8fc5f8..ba486d81f2 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -476,7 +476,7 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, syncmem_int_op()(atom_na, h_atom_na, ucell.ntype); resmem_var_op()(gk, npw * 3); - castmem_var_h2d_op()(ctx, cpu_ctx, gk, reinterpret_cast(_gk), npw * 3); + castmem_var_h2d_op()(gk, reinterpret_cast(_gk), npw * 3); } else { @@ -486,7 +486,7 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, if (PARAM.inp.precision == "single") { resmem_var_op()(gk, npw * 3); - castmem_var_h2h_op()(cpu_ctx, cpu_ctx, gk, reinterpret_cast(_gk), npw * 3); + castmem_var_h2h_op()(gk, reinterpret_cast(_gk), npw * 3); } else { @@ -872,12 +872,12 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_ { if (PARAM.inp.precision == "single") { - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_indv, this->indv.c, this->indv.nr * this->indv.nc); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_nhtol, this->nhtol.c, this->nhtol.nr * this->nhtol.nc); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_nhtolm, this->nhtolm.c, this->nhtolm.nr * this->nhtolm.nc); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_tab, this->tab.ptr, this->tab.getSize()); - castmem_d2s_h2d_op()(gpu_ctx, cpu_ctx, this->s_qq_nt, this->qq_nt.ptr, this->qq_nt.getSize()); - castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_qq_so, this->qq_so.ptr, this->qq_so.getSize()); + castmem_d2s_h2d_op()(this->s_indv, this->indv.c, this->indv.nr * this->indv.nc); + castmem_d2s_h2d_op()(this->s_nhtol, this->nhtol.c, this->nhtol.nr * this->nhtol.nc); + castmem_d2s_h2d_op()(this->s_nhtolm, this->nhtolm.c, this->nhtolm.nr * this->nhtolm.nc); + castmem_d2s_h2d_op()(this->s_tab, this->tab.ptr, this->tab.getSize()); + castmem_d2s_h2d_op()(this->s_qq_nt, this->qq_nt.ptr, this->qq_nt.getSize()); + castmem_z2c_h2d_op()(this->c_qq_so, this->qq_so.ptr, this->qq_so.getSize()); } else { @@ -896,12 +896,12 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_ { if (PARAM.inp.precision == "single") { - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_indv, this->indv.c, this->indv.nr * this->indv.nc); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_nhtol, this->nhtol.c, this->nhtol.nr * this->nhtol.nc); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_nhtolm, this->nhtolm.c, this->nhtolm.nr * this->nhtolm.nc); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_tab, this->tab.ptr, this->tab.getSize()); - castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_qq_nt, this->qq_nt.ptr, this->qq_nt.getSize()); - castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_qq_so, this->qq_so.ptr, this->qq_so.getSize()); + castmem_d2s_h2h_op()(this->s_indv, this->indv.c, this->indv.nr * this->indv.nc); + castmem_d2s_h2h_op()(this->s_nhtol, this->nhtol.c, this->nhtol.nr * this->nhtol.nc); + castmem_d2s_h2h_op()(this->s_nhtolm, this->nhtolm.c, this->nhtolm.nr * this->nhtolm.nc); + castmem_d2s_h2h_op()(this->s_tab, this->tab.ptr, this->tab.getSize()); + castmem_d2s_h2h_op()(this->s_qq_nt, this->qq_nt.ptr, this->qq_nt.getSize()); + castmem_z2c_h2h_op()(this->c_qq_so, this->qq_so.ptr, this->qq_so.getSize()); } // There's no need to synchronize double precision pointers while in a CPU environment. } @@ -1490,14 +1490,10 @@ void pseudopot_cell_vnl::cal_effective_D(const ModuleBase::matrix& veff, { if (PARAM.inp.precision == "single") { - castmem_d2s_h2d_op()(gpu_ctx, - cpu_ctx, - this->s_deeq, + castmem_d2s_h2d_op()(this->s_deeq, this->deeq.ptr, PARAM.inp.nspin * cell.nat * this->nhm * this->nhm); - castmem_z2c_h2d_op()(gpu_ctx, - cpu_ctx, - this->c_deeq_nc, + castmem_z2c_h2d_op()(this->c_deeq_nc, this->deeq_nc.ptr, PARAM.inp.nspin * cell.nat * this->nhm * this->nhm); } @@ -1515,14 +1511,10 @@ void pseudopot_cell_vnl::cal_effective_D(const ModuleBase::matrix& veff, { if (PARAM.inp.precision == "single") { - castmem_d2s_h2h_op()(cpu_ctx, - cpu_ctx, - this->s_deeq, + castmem_d2s_h2h_op()(this->s_deeq, this->deeq.ptr, PARAM.inp.nspin * cell.nat * this->nhm * this->nhm); - castmem_z2c_h2h_op()(cpu_ctx, - cpu_ctx, - this->c_deeq_nc, + castmem_z2c_h2h_op()(this->c_deeq_nc, this->deeq_nc.ptr, PARAM.inp.nspin * cell.nat * this->nhm * this->nhm); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp index 0ede5db9cd..d8a9434197 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp @@ -154,9 +154,9 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral resmem_cd_op()(this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); resmem_cd_op()(this->c_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); resmem_cd_op()(this->c_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); - castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); - castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); - castmem_z2c_h2d_op()(gpu_ctx, cpu_ctx, this->c_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); + castmem_z2c_h2d_op()(this->c_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); + castmem_z2c_h2d_op()(this->c_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); + castmem_z2c_h2d_op()(this->c_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); } resmem_zd_op()(this->z_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); resmem_zd_op()(this->z_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); @@ -170,9 +170,9 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral resmem_ch_op()(this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1)); resmem_ch_op()(this->c_eigts2, Ucell->nat * (2 * rho_basis->ny + 1)); resmem_ch_op()(this->c_eigts3, Ucell->nat * (2 * rho_basis->nz + 1)); - castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); - castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); - castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, this->c_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); + castmem_z2c_h2h_op()(this->c_eigts1, this->eigts1.c, Ucell->nat * (2 * rho_basis->nx + 1)); + castmem_z2c_h2h_op()(this->c_eigts2, this->eigts2.c, Ucell->nat * (2 * rho_basis->ny + 1)); + castmem_z2c_h2h_op()(this->c_eigts3, this->eigts3.c, Ucell->nat * (2 * rho_basis->nz + 1)); } this->z_eigts1 = this->eigts1.c; this->z_eigts2 = this->eigts2.c; diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp index 9a24bd7c7d..cddfb5f81f 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp @@ -624,7 +624,7 @@ void Stochastic_Iter::cal_storho(const UnitCell& ucell, if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { for(int is = 0; is < nspin; ++is) { - castmem_var_d2h_op()(this->cpu_ctx, this->ctx, sto_rho[is], pes->rho[is], nrxx); + castmem_var_d2h_op()(sto_rho[is], pes->rho[is], nrxx); } } else @@ -736,7 +736,7 @@ void Stochastic_Iter::calTnchi_ik(const int& ik, Stochastic_WFnorder; T* coef_real = nullptr; resmem_complex_op()(coef_real, N); - castmem_d2z_op()(this->ctx, this->ctx, coef_real, p_che->coef_real, p_che->norder); + castmem_d2z_op()(coef_real, p_che->coef_real, p_che->norder); gemv_op()(this->ctx, transa, M, N, &one, stowf.chiallorder[ik].get_pointer(), LDA, coef_real, inc, &zero, out, inc); // zgemv_(&transa, &M, &N, &one, stowf.chiallorder[ik].get_pointer(), &LDA, coef_real, &inc, &zero, out, &inc); delmem_complex_op()(this->ctx, coef_real); diff --git a/source/module_hsolver/hsolver_lcaopw.cpp b/source/module_hsolver/hsolver_lcaopw.cpp index 059318034a..b6e95b4c03 100644 --- a/source/module_hsolver/hsolver_lcaopw.cpp +++ b/source/module_hsolver/hsolver_lcaopw.cpp @@ -270,8 +270,6 @@ void HSolverLIP::solve(hamilt::Hamilt* pHamilt, // ESolver_KS_PW::p_hamilt /// calculate the contribution of Psi for charge density rho } base_device::memory::cast_memory_op()( - cpu_ctx, - cpu_ctx, pes->ekb.c, eigenvalues.data(), pes->ekb.nr * pes->ekb.nc); diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index 81fec6cb6e..05ccc8acd0 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -329,8 +329,6 @@ void HSolverPW::solve(hamilt::Hamilt* pHamilt, // copy eigenvalues to ekb in ElecState base_device::memory::cast_memory_op()( - cpu_ctx, - cpu_ctx, // pes->ekb.c, out_eigenvalues, eigenvalues.data(), diff --git a/source/module_psi/psi.cpp b/source/module_psi/psi.cpp index 01d606ec8d..cc2a2d5d41 100644 --- a/source/module_psi/psi.cpp +++ b/source/module_psi/psi.cpp @@ -236,9 +236,7 @@ Psi::Psi(const Psi& psi_in) { auto* arr = (T*)malloc(sizeof(T) * psi_in.size()); // cast the memory from T_in to T in CPU - base_device::memory::cast_memory_op()(psi_in.get_device(), - psi_in.get_device(), - arr, + base_device::memory::cast_memory_op()(arr, psi_in.get_pointer() - psi_in.get_psi_bias(), psi_in.size()); @@ -250,9 +248,7 @@ Psi::Psi(const Psi& psi_in) } else { - base_device::memory::cast_memory_op()(this->ctx, - psi_in.get_device(), - this->psi, + base_device::memory::cast_memory_op()(this->psi, psi_in.get_pointer() - psi_in.get_psi_bias(), psi_in.size()); } From 7565f9ad606ddc871194245cd00e56916bd205da Mon Sep 17 00:00:00 2001 From: critsium-xy Date: Thu, 16 Jan 2025 18:36:20 +0800 Subject: [PATCH 7/7] Remove ctx parameter in delete_memory_op --- .../module_base/kernels/test/math_op_test.cpp | 6 +- source/module_base/math_chebyshev.cpp | 32 ++++----- source/module_base/math_ylmreal.cpp | 6 +- .../module_device/cuda/memory_op.cu | 4 +- .../module_base/module_device/memory_op.cpp | 32 ++++----- source/module_base/module_device/memory_op.h | 13 ++-- .../module_device/rocm/memory_op.hip.cu | 4 +- .../module_device/test/memory_test.cpp | 8 +-- source/module_base/parallel_device.h | 4 +- .../module_base/test/blas_connector_test.cpp | 12 ++-- .../module_pw/kernels/test/pw_op_test.cpp | 16 ++--- .../module_pw/module_fft/fft_cuda.cpp | 4 +- .../module_pw/module_fft/fft_rocm.cpp | 4 +- source/module_basis/module_pw/pw_basis.cpp | 2 +- source/module_basis/module_pw/pw_basis_k.cpp | 22 +++--- .../module_basis/module_pw/pw_basis_sup.cpp | 2 +- .../module_basis/module_pw/pw_distributeg.cpp | 2 +- source/module_elecstate/elecstate_pw.cpp | 32 ++++----- .../potentials/potential_new.cpp | 12 ++-- .../module_deltaspin/cal_mw_from_lambda.cpp | 10 +-- .../hamilt_pwdft/VNL_in_pw.cpp | 70 +++++++++---------- .../hamilt_pwdft/forces_cc.cpp | 22 +++--- .../hamilt_pwdft/forces_nl.cpp | 2 +- .../hamilt_pwdft/forces_onsite.cpp | 2 +- .../hamilt_pwdft/forces_scc.cpp | 10 +-- .../hamilt_pwdft/fs_kin_tools.cpp | 4 +- .../hamilt_pwdft/fs_nonlocal_tools.cpp | 40 +++++------ .../hamilt_pwdft/hamilt_pw.cpp | 6 +- .../kernels/test/ekinetic_op_test.cpp | 6 +- .../kernels/test/force_op_test.cpp | 24 +++---- .../kernels/test/meta_op_test.cpp | 8 +-- .../kernels/test/nonlocal_op_test.cpp | 12 ++-- .../kernels/test/stress_op_test.cpp | 36 +++++----- .../kernels/test/veff_op_test.cpp | 10 +-- .../hamilt_pwdft/kernels/test/vnl_op_test.cpp | 24 +++---- .../hamilt_pwdft/kernels/test/wf_op_test.cpp | 18 ++--- .../hamilt_pwdft/onsite_proj_tools.cpp | 52 +++++++------- .../hamilt_pwdft/onsite_projector.cpp | 6 +- .../hamilt_pwdft/operator_pw/meta_pw.cpp | 2 +- .../hamilt_pwdft/operator_pw/nonlocal_pw.cpp | 4 +- .../operator_pw/onsite_proj_pw.cpp | 16 ++--- .../hamilt_pwdft/operator_pw/veff_pw.cpp | 4 +- .../hamilt_pwdft/stress_func_cc.cpp | 10 +-- .../hamilt_pwdft/stress_func_nl.cpp | 2 +- .../hamilt_pwdft/stress_func_onsite.cpp | 2 +- .../hamilt_pwdft/structure_factor.cpp | 18 ++--- .../hamilt_pwdft/structure_factor_k.cpp | 4 +- .../hamilt_stodft/sto_che.cpp | 2 +- .../module_hamilt_pw/hamilt_stodft/sto_che.h | 4 +- .../hamilt_stodft/sto_forces.cpp | 2 +- .../hamilt_stodft/sto_iter.cpp | 10 +-- .../hamilt_stodft/sto_stress_pw.cpp | 2 +- source/module_hsolver/diago_dav_subspace.cpp | 28 ++++---- source/module_hsolver/diago_david.cpp | 28 ++++---- source/module_hsolver/diago_iter_assist.cpp | 28 ++++---- .../kernels/cuda/math_kernel_op.cu | 6 +- .../module_hsolver/kernels/math_kernel_op.cpp | 2 +- .../kernels/rocm/math_kernel_op.hip.cu | 6 +- .../kernels/test/math_kernel_test.cpp | 42 +++++------ source/module_hsolver/test/hsolver_pw_sup.h | 16 ++--- source/module_psi/psi.cpp | 2 +- 61 files changed, 406 insertions(+), 413 deletions(-) diff --git a/source/module_base/kernels/test/math_op_test.cpp b/source/module_base/kernels/test/math_op_test.cpp index 6d3aa8d10a..cfdedb234e 100644 --- a/source/module_base/kernels/test/math_op_test.cpp +++ b/source/module_base/kernels/test/math_op_test.cpp @@ -332,9 +332,9 @@ TEST_F(TestModuleBaseMathMultiDevice, cal_ylm_real_op_gpu) EXPECT_LT(fabs(ylm[ii] - expected_ylm[ii]), 6e-5); } - delmem_var_op()(gpu_ctx, d_g); - delmem_var_op()(gpu_ctx, d_p); - delmem_var_op()(gpu_ctx, d_ylm); + delmem_var_op()(d_g); + delmem_var_op()(d_p); + delmem_var_op()(d_ylm); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_base/math_chebyshev.cpp b/source/module_base/math_chebyshev.cpp index a152d20505..b2cc6aadea 100644 --- a/source/module_base/math_chebyshev.cpp +++ b/source/module_base/math_chebyshev.cpp @@ -84,8 +84,8 @@ Chebyshev::~Chebyshev() delete[] polytrace; if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) { - delmem_var_op()(this->ctx, this->coef_real); - delmem_complex_op()(this->ctx, this->coef_complex); + delmem_var_op()(this->coef_real); + delmem_complex_op()(this->coef_complex); } else { @@ -132,7 +132,7 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, resmem_var_op()(dot_device, 1); container::kernels::blas_dot()(dim2, pL, 1, pR, 1, dot_device); syncmem_var_d2h_op()(&result, dot_device, 1); - delmem_var_op()(this->ctx, dot_device); + delmem_var_op()(dot_device); } else { @@ -151,7 +151,7 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, pL += 2 * LDA; pR += 2 * LDA; } - delmem_var_op()(this->ctx, dot_device); + delmem_var_op()(dot_device); } return result; } @@ -462,9 +462,9 @@ void Chebyshev::calfinalvec_real( arrayn = arraynp1; arraynp1 = tem; } - delmem_complex_op()(this->ctx, arraynp1); - delmem_complex_op()(this->ctx, arrayn); - delmem_complex_op()(this->ctx, arrayn_1); + delmem_complex_op()(arraynp1); + delmem_complex_op()(arrayn); + delmem_complex_op()(arrayn_1); return; } @@ -527,9 +527,9 @@ void Chebyshev::calfinalvec_complex( arrayn = arraynp1; arraynp1 = tem; } - delmem_complex_op()(this->ctx, arraynp1); - delmem_complex_op()(this->ctx, arrayn); - delmem_complex_op()(this->ctx, arrayn_1); + delmem_complex_op()(arraynp1); + delmem_complex_op()(arrayn); + delmem_complex_op()(arrayn_1); return; } @@ -618,9 +618,9 @@ void Chebyshev::tracepolyA( arraynp1 = tem; } - delmem_complex_op()(this->ctx, arraynp1); - delmem_complex_op()(this->ctx, arrayn); - delmem_complex_op()(this->ctx, arrayn_1); + delmem_complex_op()(arraynp1); + delmem_complex_op()(arrayn); + delmem_complex_op()(arrayn_1); return; } @@ -754,9 +754,9 @@ bool Chebyshev::checkconverge( arraynp1 = tem; } - delmem_complex_op()(this->ctx, arraynp1); - delmem_complex_op()(this->ctx, arrayn); - delmem_complex_op()(this->ctx, arrayn_1); + delmem_complex_op()(arraynp1); + delmem_complex_op()(arrayn); + delmem_complex_op()(arrayn_1); return converge; } diff --git a/source/module_base/math_ylmreal.cpp b/source/module_base/math_ylmreal.cpp index 80153a2055..fac76cf959 100644 --- a/source/module_base/math_ylmreal.cpp +++ b/source/module_base/math_ylmreal.cpp @@ -342,9 +342,9 @@ void YlmReal::Ylm_Real(Device * ctx, const int lmax2, const int ng, const FPTYPE p, ylm); - delmem_var_op()(ctx, p); - delmem_var_op()(ctx, phi); - delmem_var_op()(ctx, cost); + delmem_var_op()(p); + delmem_var_op()(phi); + delmem_var_op()(cost); } // end subroutine ylmr2 //========================================================== diff --git a/source/module_base/module_device/cuda/memory_op.cu b/source/module_base/module_device/cuda/memory_op.cu index e64d5857b1..c4f9efdb42 100644 --- a/source/module_base/module_device/cuda/memory_op.cu +++ b/source/module_base/module_device/cuda/memory_op.cu @@ -58,7 +58,7 @@ void resize_memory_op::operator()(FPTYPE*& arr, { if (arr != nullptr) { - delete_memory_op()(gpu_ctx, arr); + delete_memory_op()(arr); } cudaErrcheck(cudaMalloc((void**)&arr, sizeof(FPTYPE) * size)); std::string record_string; @@ -179,7 +179,7 @@ struct cast_memory_op -void delete_memory_op::operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr) +void delete_memory_op::operator()(FPTYPE* arr) { cudaErrcheck(cudaFree(arr)); } diff --git a/source/module_base/module_device/memory_op.cpp b/source/module_base/module_device/memory_op.cpp index 290b3d35af..525ecee89f 100644 --- a/source/module_base/module_device/memory_op.cpp +++ b/source/module_base/module_device/memory_op.cpp @@ -90,7 +90,7 @@ struct cast_memory_op struct delete_memory_op { - void operator()(const base_device::DEVICE_CPU* dev, FPTYPE* arr) + void operator()(FPTYPE* arr) { free(arr); } @@ -230,7 +230,7 @@ struct cast_memory_op struct delete_memory_op { - void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr) + void operator()(FPTYPE* arr) { } }; @@ -336,7 +336,7 @@ template struct delete_memory_op, base_device::DEVICE_GPU>; template struct resize_memory_op_mt { - void operator()(const base_device::DEVICE_CPU* dev, FPTYPE*& arr, const size_t size, const char* record_in) + void operator()(FPTYPE*& arr, const size_t size, const char* record_in) { if (arr != nullptr) { @@ -363,7 +363,7 @@ struct resize_memory_op_mt template struct delete_memory_op_mt { - void operator()(const base_device::DEVICE_CPU* dev, FPTYPE* arr) + void operator()(FPTYPE* arr) { free_ht(arr); } @@ -397,26 +397,26 @@ void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t d template void set_memory(FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type){ if (device_type == base_device::AbacusDevice_t::CpuDevice){ - set_memory_op()(cpu_ctx, arr, var, size); + set_memory_op()(arr, var, size); } else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - set_memory_op()(gpu_ctx, arr, var, size); + set_memory_op()(arr, var, size); } } template void synchronize_memory(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in){ if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - synchronize_memory_op()(cpu_ctx, cpu_ctx, arr_out, arr_in, size); + synchronize_memory_op()(arr_out, arr_in, size); } else if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - synchronize_memory_op()(cpu_ctx, gpu_ctx, arr_out, arr_in, size); + synchronize_memory_op()(arr_out, arr_in, size); } else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - synchronize_memory_op()(gpu_ctx, cpu_ctx, arr_out, arr_in, size); + synchronize_memory_op()(arr_out, arr_in, size); } else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - synchronize_memory_op()(gpu_ctx, gpu_ctx, arr_out, arr_in, size); + synchronize_memory_op()(arr_out, arr_in, size); } } @@ -424,16 +424,16 @@ template void cast_memory(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in) { if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - cast_memory_op()(cpu_ctx, cpu_ctx, arr_out, arr_in, size); + cast_memory_op()(arr_out, arr_in, size); } else if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - cast_memory_op()(cpu_ctx, gpu_ctx, arr_out, arr_in, size); + cast_memory_op()(arr_out, arr_in, size); } else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - cast_memory_op()(gpu_ctx, cpu_ctx, arr_out, arr_in, size); + cast_memory_op()(arr_out, arr_in, size); } else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - cast_memory_op()(gpu_ctx, gpu_ctx, arr_out, arr_in, size); + cast_memory_op()(arr_out, arr_in, size); } } @@ -441,10 +441,10 @@ template void delete_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice){ - delete_memory_op()(cpu_ctx, arr); + delete_memory_op()(arr); } else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - delete_memory_op()(gpu_ctx, arr); + delete_memory_op()(arr); } } diff --git a/source/module_base/module_device/memory_op.h b/source/module_base/module_device/memory_op.h index 61e8f012bf..e09294d970 100644 --- a/source/module_base/module_device/memory_op.h +++ b/source/module_base/module_device/memory_op.h @@ -62,8 +62,6 @@ struct cast_memory_op /// @brief memcpy for multi-device /// /// Input Parameters - /// \param dev_out : the type of computing device of arr_out - /// \param dev_in : the type of computing device of arr_in /// \param arr_in : input array /// \param size : array size /// @@ -80,9 +78,8 @@ struct delete_memory_op /// @brief free memory for multi-device /// /// Input Parameters - /// \param dev : the type of computing device /// \param arr : the input array - void operator()(const Device* dev, FPTYPE* arr); + void operator()(FPTYPE* arr); }; template @@ -141,7 +138,7 @@ struct synchronize_memory_op struct delete_memory_op { - void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr); + void operator()(FPTYPE* arr); }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM @@ -153,13 +150,12 @@ struct resize_memory_op_mt /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. /// /// Input Parameters - /// \param dev : the type of computing device /// \param size : array size /// \param record_string : label for memory record /// /// Output Parameters /// \param arr : allocated array - void operator()(const Device* dev, FPTYPE*& arr, const size_t size, const char* record_in = nullptr); + void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); }; template @@ -168,9 +164,8 @@ struct delete_memory_op_mt /// @brief free memory for multi-device /// /// Input Parameters - /// \param dev : the type of computing device /// \param arr : the input array - void operator()(const Device* dev, FPTYPE* arr); + void operator()(FPTYPE* arr); }; #endif // __DSP diff --git a/source/module_base/module_device/rocm/memory_op.hip.cu b/source/module_base/module_device/rocm/memory_op.hip.cu index 10d78947e0..7e4cf7f497 100644 --- a/source/module_base/module_device/rocm/memory_op.hip.cu +++ b/source/module_base/module_device/rocm/memory_op.hip.cu @@ -45,7 +45,7 @@ void resize_memory_op::operator()(FPTYPE*& arr, { if (arr != nullptr) { - delete_memory_op()(dev, arr); + delete_memory_op()(arr); } hipErrcheck(hipMalloc((void**)&arr, sizeof(FPTYPE) * size)); } @@ -152,7 +152,7 @@ struct cast_memory_op -void delete_memory_op::operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr) +void delete_memory_op::operator()(FPTYPE* arr) { hipErrcheck(hipFree(arr)); } diff --git a/source/module_base/module_device/test/memory_test.cpp b/source/module_base/module_device/test/memory_test.cpp index fced574e5c..39c5c25d52 100644 --- a/source/module_base/module_device/test/memory_test.cpp +++ b/source/module_base/module_device/test/memory_test.cpp @@ -161,13 +161,13 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_cpu) TEST_F(TestModulePsiMemory, delete_memory_op_double_cpu) { double* h_xx = (double*)malloc(sizeof(double) * xx.size()); - delete_memory_double_cpu_op()(cpu_ctx, h_xx); + delete_memory_double_cpu_op()(h_xx); } TEST_F(TestModulePsiMemory, delete_memory_op_complex_double_cpu) { std::complex* hz_xx = (std::complex*)malloc(sizeof(std::complex) * z_xx.size()); - delete_memory_complex_double_cpu_op()(cpu_ctx, hz_xx); + delete_memory_complex_double_cpu_op()(hz_xx); } #if __UT_USE_CUDA || __UT_USE_ROCM @@ -338,13 +338,13 @@ TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_gpu) TEST_F(TestModulePsiMemory, delete_memory_op_double_gpu) { thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - delete_memory_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(d_xx)); + delete_memory_double_gpu_op()(thrust::raw_pointer_cast(d_xx)); } TEST_F(TestModulePsiMemory, delete_memory_op_complex_double_gpu) { thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - delete_memory_complex_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(dz_xx)); + delete_memory_complex_double_gpu_op()(thrust::raw_pointer_cast(dz_xx)); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/module_base/parallel_device.h b/source/module_base/parallel_device.h index 9b43d668b4..7c41b8f28f 100644 --- a/source/module_base/parallel_device.h +++ b/source/module_base/parallel_device.h @@ -58,7 +58,7 @@ void bcast_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm, base_device::memory::synchronize_memory_op()(object, object_cpu, n); if(alloc) { - base_device::memory::delete_memory_op()(cpu_ctx, object_cpu); + base_device::memory::delete_memory_op()(object_cpu); } } return; @@ -95,7 +95,7 @@ void reduce_dev(const Device* ctx, T* object, const int& n, const MPI_Comm& comm base_device::memory::synchronize_memory_op()(object, object_cpu, n); if(alloc) { - base_device::memory::delete_memory_op()(cpu_ctx, object_cpu); + base_device::memory::delete_memory_op()(object_cpu); } } return; diff --git a/source/module_base/test/blas_connector_test.cpp b/source/module_base/test/blas_connector_test.cpp index b4a7107fa5..dfe1e484b1 100644 --- a/source/module_base/test/blas_connector_test.cpp +++ b/source/module_base/test/blas_connector_test.cpp @@ -111,7 +111,7 @@ TEST(blas_connector, ScalGpu) { syncmem_z2z_h2d_op()(result_gpu, result, sizeof(std::complex) * 8); BlasConnector::scal(size,scale,result_gpu,incx,base_device::AbacusDevice_t::GpuDevice); syncmem_z2z_d2h_op()(result, result_gpu, sizeof(std::complex) * 8); - delmem_zd_op()(gpu_ctx, result_gpu); + delmem_zd_op()(result_gpu); // incx is the spacing between elements if result for (int i = 0; i < size; i++) { EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); @@ -214,8 +214,8 @@ TEST(blas_connector, AxpyGpu) { syncmem_z2z_h2d_op()(x_gpu, x_const.data(), sizeof(std::complex) * size); BlasConnector::axpy(size, scale, x_gpu, incx, result_gpu, incy, base_device::AbacusDevice_t::GpuDevice); syncmem_z2z_d2h_op()(result.data(), result_gpu, sizeof(std::complex) * size); - delmem_zd_op()(gpu_ctx, result_gpu); - delmem_zd_op()(gpu_ctx, x_gpu); + delmem_zd_op()(result_gpu); + delmem_zd_op()(x_gpu); for (int i = 0; i < size; i++) { EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); @@ -672,9 +672,9 @@ TEST(blas_connector, GemmGpu) { a_gpu, lda, b_gpu, ldb, beta_const, result_gpu, ldc, base_device::AbacusDevice_t::GpuDevice); syncmem_z2z_d2h_op()(result.data(), result_gpu, sizeof(std::complex) * size_n * ldc); - delmem_zd_op()(gpu_ctx, result_gpu); - delmem_zd_op()(gpu_ctx, a_gpu); - delmem_zd_op()(gpu_ctx, b_gpu); + delmem_zd_op()(result_gpu); + delmem_zd_op()(a_gpu); + delmem_zd_op()(b_gpu); for (int i = 0; i < size_m; i++) for (int j = 0; j < size_n; j++) { EXPECT_DOUBLE_EQ(answer[i + j * ldc].real(), diff --git a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp index 4b62b5f8fb..6adac4613f 100644 --- a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp +++ b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp @@ -116,9 +116,9 @@ TEST_F(TestModulePWPWMultiDevice, set_3d_fft_box_op_gpu) for (int ii = 0; ii < this->nxyz; ii++) { EXPECT_LT(fabs(res[ii] - out_1[ii]), 1e-12); } - delete_memory_int_gpu_op()(gpu_ctx, d_box_index); - delete_memory_complex_gpu_op()(gpu_ctx, d_res); - delete_memory_complex_gpu_op()(gpu_ctx, d_in_1); + delete_memory_int_gpu_op()(d_box_index); + delete_memory_complex_gpu_op()(d_res); + delete_memory_complex_gpu_op()(d_in_1); } TEST_F(TestModulePWPWMultiDevice, set_recip_to_real_output_op_gpu) @@ -137,8 +137,8 @@ TEST_F(TestModulePWPWMultiDevice, set_recip_to_real_output_op_gpu) for (int ii = 0; ii < this->nxyz; ii++) { EXPECT_LT(fabs(res[ii] - out_2[ii]), 1e-12); } - delete_memory_complex_gpu_op()(gpu_ctx, d_res); - delete_memory_complex_gpu_op()(gpu_ctx, d_in_2); + delete_memory_complex_gpu_op()(d_res); + delete_memory_complex_gpu_op()(d_in_2); } TEST_F(TestModulePWPWMultiDevice, set_real_to_recip_output_op_gpu) @@ -160,9 +160,9 @@ TEST_F(TestModulePWPWMultiDevice, set_real_to_recip_output_op_gpu) for (int ii = 0; ii < out_3.size(); ii++) { EXPECT_LT(fabs(res[ii] - out_3[ii]), 5e-6); } - delete_memory_int_gpu_op()(gpu_ctx, d_box_index); - delete_memory_complex_gpu_op()(gpu_ctx, d_res); - delete_memory_complex_gpu_op()(gpu_ctx, d_in_3); + delete_memory_int_gpu_op()(d_box_index); + delete_memory_complex_gpu_op()(d_res); + delete_memory_complex_gpu_op()(d_in_3); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/module_basis/module_pw/module_fft/fft_cuda.cpp b/source/module_basis/module_pw/module_fft/fft_cuda.cpp index aa132cf071..9bec9253e7 100644 --- a/source/module_basis/module_pw/module_fft/fft_cuda.cpp +++ b/source/module_basis/module_pw/module_fft/fft_cuda.cpp @@ -50,7 +50,7 @@ void FFT_CUDA::clear() this->cleanFFT(); if (c_auxr_3d != nullptr) { - delmem_cd_op()(gpu_ctx, c_auxr_3d); + delmem_cd_op()(c_auxr_3d); c_auxr_3d = nullptr; } } @@ -60,7 +60,7 @@ void FFT_CUDA::clear() this->cleanFFT(); if (z_auxr_3d != nullptr) { - delmem_zd_op()(gpu_ctx, z_auxr_3d); + delmem_zd_op()(z_auxr_3d); z_auxr_3d = nullptr; } } diff --git a/source/module_basis/module_pw/module_fft/fft_rocm.cpp b/source/module_basis/module_pw/module_fft/fft_rocm.cpp index 9b8702a25e..1dd9c433ec 100644 --- a/source/module_basis/module_pw/module_fft/fft_rocm.cpp +++ b/source/module_basis/module_pw/module_fft/fft_rocm.cpp @@ -49,7 +49,7 @@ void FFT_ROCM::clear() this->cleanFFT(); if (c_auxr_3d != nullptr) { - delmem_cd_op()(gpu_ctx, c_auxr_3d); + delmem_cd_op()(c_auxr_3d); c_auxr_3d = nullptr; } } @@ -59,7 +59,7 @@ void FFT_ROCM::clear() this->cleanFFT(); if (z_auxr_3d != nullptr) { - delmem_zd_op()(gpu_ctx, z_auxr_3d); + delmem_zd_op()(z_auxr_3d); z_auxr_3d = nullptr; } } diff --git a/source/module_basis/module_pw/pw_basis.cpp b/source/module_basis/module_pw/pw_basis.cpp index 7c8a1293da..5fbff68f0c 100644 --- a/source/module_basis/module_pw/pw_basis.cpp +++ b/source/module_basis/module_pw/pw_basis.cpp @@ -39,7 +39,7 @@ PW_Basis:: ~PW_Basis() delete[] gg_uniq; #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - delmem_int_op()(gpu_ctx, this->d_is2fftixy); + delmem_int_op()(this->d_is2fftixy); } #endif } diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp index 5af491c40a..2e0f85372d 100644 --- a/source/module_basis/module_pw/pw_basis_k.cpp +++ b/source/module_basis/module_pw/pw_basis_k.cpp @@ -25,24 +25,24 @@ PW_Basis_K::~PW_Basis_K() #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { if (this->precision == "single") { - delmem_sd_op()(gpu_ctx, this->s_kvec_c); - delmem_sd_op()(gpu_ctx, this->s_gcar); - delmem_sd_op()(gpu_ctx, this->s_gk2); + delmem_sd_op()(this->s_kvec_c); + delmem_sd_op()(this->s_gcar); + delmem_sd_op()(this->s_gk2); } else { - delmem_dd_op()(gpu_ctx, this->d_gcar); - delmem_dd_op()(gpu_ctx, this->d_gk2); + delmem_dd_op()(this->d_gcar); + delmem_dd_op()(this->d_gk2); } - delmem_dd_op()(gpu_ctx, this->d_kvec_c); - delmem_int_op()(gpu_ctx, this->ig2ixyz_k); - delmem_int_op()(gpu_ctx, this->d_igl2isz_k); + delmem_dd_op()(this->d_kvec_c); + delmem_int_op()(this->ig2ixyz_k); + delmem_int_op()(this->d_igl2isz_k); } else { #endif if (this->precision == "single") { - delmem_sh_op()(cpu_ctx, this->s_kvec_c); - delmem_sh_op()(cpu_ctx, this->s_gcar); - delmem_sh_op()(cpu_ctx, this->s_gk2); + delmem_sh_op()(this->s_kvec_c); + delmem_sh_op()(this->s_gcar); + delmem_sh_op()(this->s_gk2); } // There's no need to delete double pointers while in a CPU environment. #if defined(__CUDA) || defined(__ROCM) diff --git a/source/module_basis/module_pw/pw_basis_sup.cpp b/source/module_basis/module_pw/pw_basis_sup.cpp index 97fe4cd525..e5422bd5d3 100644 --- a/source/module_basis/module_pw/pw_basis_sup.cpp +++ b/source/module_basis/module_pw/pw_basis_sup.cpp @@ -318,7 +318,7 @@ void PW_Basis_Sup::get_ig2isz_is2fftixy( #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - delmem_int_op()(gpu_ctx, this->d_is2fftixy); + delmem_int_op()(this->d_is2fftixy); d_is2fftixy = nullptr; } #endif diff --git a/source/module_basis/module_pw/pw_distributeg.cpp b/source/module_basis/module_pw/pw_distributeg.cpp index 5e32c58963..0e92d6f665 100644 --- a/source/module_basis/module_pw/pw_distributeg.cpp +++ b/source/module_basis/module_pw/pw_distributeg.cpp @@ -164,7 +164,7 @@ void PW_Basis::get_ig2isz_is2fftixy( delete[] this->is2fftixy; this->is2fftixy = nullptr; // map is (index of sticks) to ixy (iy + ix * fftny). #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - delmem_int_op()(gpu_ctx, this->d_is2fftixy); + delmem_int_op()(this->d_is2fftixy); d_is2fftixy = nullptr; } #endif diff --git a/source/module_elecstate/elecstate_pw.cpp b/source/module_elecstate/elecstate_pw.cpp index 9ac8d0f14e..f241c59db8 100644 --- a/source/module_elecstate/elecstate_pw.cpp +++ b/source/module_elecstate/elecstate_pw.cpp @@ -33,26 +33,26 @@ ElecStatePW::~ElecStatePW() { if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - delmem_var_op()(this->ctx, this->rho_data); + delmem_var_op()(this->rho_data); delete[] this->rho; if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) { - delmem_complex_op()(this->ctx, this->rhog_data); + delmem_complex_op()(this->rhog_data); delete[] this->rhog; } if (get_xc_func_type() == 3 || PARAM.inp.out_elf[0] > 0) { - delmem_var_op()(this->ctx, this->kin_r_data); + delmem_var_op()(this->kin_r_data); delete[] this->kin_r; } } if (PARAM.globalv.use_uspp) { - delmem_var_op()(this->ctx, this->becsum); + delmem_var_op()(this->becsum); } - delmem_complex_op()(this->ctx, this->wfcr); - delmem_complex_op()(this->ctx, this->wfcr_another_spin); + delmem_complex_op()(this->wfcr); + delmem_complex_op()(this->wfcr_another_spin); } template @@ -413,13 +413,13 @@ void ElecStatePW::cal_becsum(const psi::Psi& psi) } } } - delmem_complex_op()(this->ctx, auxk1); - delmem_complex_op()(this->ctx, auxk2); - delmem_complex_op()(this->ctx, aux_gk); + delmem_complex_op()(auxk1); + delmem_complex_op()(auxk2); + delmem_complex_op()(aux_gk); } } } - delmem_complex_op()(this->ctx, becp); + delmem_complex_op()(becp); } template @@ -547,15 +547,15 @@ void ElecStatePW::addusdens_g(const Real* becsum, T** rhog) } } } - delmem_complex_op()(this->ctx, skk); - delmem_complex_op()(this->ctx, aux2); - delmem_complex_op()(this->ctx, tbecsum); + delmem_complex_op()(skk); + delmem_complex_op()(aux2); + delmem_complex_op()(tbecsum); } } - delmem_var_op()(this->ctx, qmod); - delmem_complex_op()(this->ctx, qgm); - delmem_var_op()(this->ctx, ylmk0); + delmem_var_op()(qmod); + delmem_complex_op()(qgm); + delmem_var_op()(ylmk0); } template class ElecStatePW, base_device::DEVICE_CPU>; diff --git a/source/module_elecstate/potentials/potential_new.cpp b/source/module_elecstate/potentials/potential_new.cpp index f1cc883bd8..f3d68df05a 100644 --- a/source/module_elecstate/potentials/potential_new.cpp +++ b/source/module_elecstate/potentials/potential_new.cpp @@ -50,18 +50,18 @@ Potential::~Potential() } if (PARAM.inp.basis_type == "pw" && PARAM.inp.device == "gpu") { if (PARAM.inp.precision == "single") { - delmem_sd_op()(gpu_ctx, s_veff_smooth); - delmem_sd_op()(gpu_ctx, s_vofk_smooth); + delmem_sd_op()(s_veff_smooth); + delmem_sd_op()(s_vofk_smooth); } else { - delmem_dd_op()(gpu_ctx, d_veff_smooth); - delmem_dd_op()(gpu_ctx, d_vofk_smooth); + delmem_dd_op()(d_veff_smooth); + delmem_dd_op()(d_vofk_smooth); } } else { if (PARAM.inp.precision == "single") { - delmem_sh_op()(cpu_ctx, s_veff_smooth); - delmem_sh_op()(cpu_ctx, s_vofk_smooth); + delmem_sh_op()(s_veff_smooth); + delmem_sh_op()(s_vofk_smooth); } } } diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp index 8ee4ce9c08..36baed7bab 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp @@ -100,7 +100,7 @@ void spinconstrain::SpinConstrain>::calculate_delta_hcc(std h_tmp, nbands ); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(ctx, ps_pointer); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(ps_pointer); delete[] becp_cpu; #endif @@ -305,7 +305,7 @@ void spinconstrain::SpinConstrain>::cal_mw_from_lambda(int } // free memory for becp_pointer in GPU device - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(ctx, becp_pointer); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(becp_pointer); } #endif // calculate weights from ekb to update wg @@ -484,9 +484,9 @@ void spinconstrain::SpinConstrain>::update_psi_charge(const &this->pelec->ekb(ik, 0)); } - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(ctx, sub_h_save); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(ctx, sub_s_save); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(ctx, becp_save); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(sub_h_save); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(sub_s_save); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(becp_save); this->sub_h_save = nullptr; this->sub_s_save = nullptr; this->becp_save = nullptr; diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index ba486d81f2..bcd0cba74d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -36,42 +36,42 @@ void pseudopot_cell_vnl::release_memory() { if (PARAM.inp.precision == "single") { - delmem_sd_op()(gpu_ctx, this->s_deeq); - delmem_sd_op()(gpu_ctx, this->s_nhtol); - delmem_sd_op()(gpu_ctx, this->s_nhtolm); - delmem_sd_op()(gpu_ctx, this->s_indv); - delmem_sd_op()(gpu_ctx, this->s_tab); - delmem_sd_op()(gpu_ctx, this->s_qq_nt); - delmem_cd_op()(gpu_ctx, this->c_deeq_nc); - delmem_cd_op()(gpu_ctx, this->c_vkb); - delmem_cd_op()(gpu_ctx, this->c_qq_so); + delmem_sd_op()(this->s_deeq); + delmem_sd_op()(this->s_nhtol); + delmem_sd_op()(this->s_nhtolm); + delmem_sd_op()(this->s_indv); + delmem_sd_op()(this->s_tab); + delmem_sd_op()(this->s_qq_nt); + delmem_cd_op()(this->c_deeq_nc); + delmem_cd_op()(this->c_vkb); + delmem_cd_op()(this->c_qq_so); } else { - delmem_zd_op()(gpu_ctx, this->z_deeq_nc); - delmem_zd_op()(gpu_ctx, this->z_qq_so); + delmem_zd_op()(this->z_deeq_nc); + delmem_zd_op()(this->z_qq_so); } - delmem_dd_op()(gpu_ctx, this->d_deeq); - delmem_zd_op()(gpu_ctx, this->z_vkb); - delmem_dd_op()(gpu_ctx, this->d_tab); - delmem_dd_op()(gpu_ctx, this->d_indv); - delmem_dd_op()(gpu_ctx, this->d_nhtol); - delmem_dd_op()(gpu_ctx, this->d_nhtolm); - delmem_dd_op()(gpu_ctx, this->d_qq_nt); + delmem_dd_op()(this->d_deeq); + delmem_zd_op()(this->z_vkb); + delmem_dd_op()(this->d_tab); + delmem_dd_op()(this->d_indv); + delmem_dd_op()(this->d_nhtol); + delmem_dd_op()(this->d_nhtolm); + delmem_dd_op()(this->d_qq_nt); } else { if (PARAM.inp.precision == "single") { - delmem_sh_op()(cpu_ctx, this->s_deeq); - delmem_sh_op()(cpu_ctx, this->s_nhtol); - delmem_sh_op()(cpu_ctx, this->s_nhtolm); - delmem_sh_op()(cpu_ctx, this->s_indv); - delmem_sh_op()(cpu_ctx, this->s_tab); - delmem_sh_op()(cpu_ctx, this->s_qq_nt); - delmem_ch_op()(cpu_ctx, this->c_deeq_nc); - delmem_ch_op()(cpu_ctx, this->c_vkb); - delmem_ch_op()(cpu_ctx, this->c_qq_so); + delmem_sh_op()(this->s_deeq); + delmem_sh_op()(this->s_nhtol); + delmem_sh_op()(this->s_nhtolm); + delmem_sh_op()(this->s_indv); + delmem_sh_op()(this->s_tab); + delmem_sh_op()(this->s_qq_nt); + delmem_ch_op()(this->c_deeq_nc); + delmem_ch_op()(this->c_vkb); + delmem_ch_op()(this->c_qq_so); } // There's no need to delete double precision pointers while in a CPU environment. } @@ -402,7 +402,7 @@ void pseudopot_cell_vnl::getvnl(const int& ik, const UnitCell& ucell, ModuleBase delete[] gk; delete[] vq; - delmem_complex_op()(ctx, sk); + delmem_complex_op()(sk); ModuleBase::timer::tick("pp_cell_vnl", "getvnl"); return; @@ -527,18 +527,18 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, delete[] h_atom_nh; delete[] h_atom_na; delete[] h_atom_nb; - delmem_var_op()(ctx, ylm); - delmem_var_op()(ctx, vkb1); - delmem_complex_op()(ctx, sk); + delmem_var_op()(ylm); + delmem_var_op()(vkb1); + delmem_complex_op()(sk); if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - delmem_var_op()(ctx, gk); + delmem_var_op()(gk); } if (PARAM.inp.device == "gpu") { - delmem_int_op()(ctx, atom_nh); - delmem_int_op()(ctx, atom_nb); - delmem_int_op()(ctx, atom_na); + delmem_int_op()(atom_nh); + delmem_int_op()(atom_nb); + delmem_int_op()(atom_na); } ModuleBase::timer::tick("pp_cell_vnl", "getvnl"); } // end subroutine getvnl diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp index 8f659fd729..41184b11d0 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_cc.cpp @@ -213,12 +213,12 @@ void Forces::cal_force_cc(ModuleBase::matrix& forcecc, } if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, gv_x_d); - delmem_var_op()(this->ctx, gv_y_d); - delmem_var_op()(this->ctx, gv_z_d); - delmem_var_op()(this->ctx, force_d); - delmem_var_op()(this->ctx, rhocgigg_vec_d); - delmem_complex_op()(this->ctx, psiv_d); + delmem_var_op()(gv_x_d); + delmem_var_op()(gv_y_d); + delmem_var_op()(gv_z_d); + delmem_var_op()(force_d); + delmem_var_op()(rhocgigg_vec_d); + delmem_complex_op()(psiv_d); } delete[] rhocg; @@ -334,11 +334,11 @@ void Forces::deriv_drhoc r,rhoc,gx_arr.data()+igl0,rab,drhocg+igl0,mesh,igl0,rho_basis->ngg-igl0,ucell_in.omega,type); } - delmem_var_op()(this->ctx, r_d); - delmem_var_op()(this->ctx, rhoc_d); - delmem_var_op()(this->ctx, rab_d); - delmem_var_op()(this->ctx, gx_arr_d); - delmem_var_op()(this->ctx, drhocg_d); + delmem_var_op()(r_d); + delmem_var_op()(rhoc_d); + delmem_var_op()(rab_d); + delmem_var_op()(gx_arr_d); + delmem_var_op()(drhocg_d); return; } diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp index 3ab586f7f9..bd615f0eef 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_nl.cpp @@ -63,7 +63,7 @@ void Forces::cal_force_nl(ModuleBase::matrix& forcenl, } // end ik syncmem_var_d2h_op()(forcenl.c, force, forcenl.nr * forcenl.nc); - delmem_var_op()(this->ctx, force); + delmem_var_op()(force); // sum up forcenl from all processors Parallel_Reduce::reduce_all(forcenl.c, forcenl.nr * forcenl.nc); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp index b216533e15..36f90f0001 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp @@ -66,7 +66,7 @@ void Forces::cal_force_onsite(ModuleBase::matrix& force_onsite, } // end ik syncmem_var_d2h_op()(force_onsite.c, force, force_onsite.nr * force_onsite.nc); - delmem_var_op()(this->ctx, force); + delmem_var_op()(force); // sum up force_onsite from all processors Parallel_Reduce::reduce_all(force_onsite.c, force_onsite.nr * force_onsite.nc); diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp index e36df1bb23..ab63f43aff 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp @@ -217,11 +217,11 @@ void Forces::deriv_drhoc_scc(const bool& numeric, } - delmem_var_op()(this->ctx, r_d); - delmem_var_op()(this->ctx, rhoc_d); - delmem_var_op()(this->ctx, rab_d); - delmem_var_op()(this->ctx, gx_arr_d); - delmem_var_op()(this->ctx, drhocg_d); + delmem_var_op()(r_d); + delmem_var_op()(rhoc_d); + delmem_var_op()(rab_d); + delmem_var_op()(gx_arr_d); + delmem_var_op()(drhocg_d); return; } diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp index c26598d99d..00049866f9 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/fs_kin_tools.cpp @@ -42,8 +42,8 @@ FS_Kin_tools::~FS_Kin_tools() { if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, d_gk); - delmem_var_op()(this->ctx, d_kfac); + delmem_var_op()(d_gk); + delmem_var_op()(d_kfac); } } diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp index 96888d306a..523cb2b504 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp @@ -124,40 +124,40 @@ void FS_Nonlocal_tools::delete_memory() { // delete memory - delmem_var_op()(this->ctx, hd_vq); - delmem_var_op()(this->ctx, hd_vq_deri); - delmem_var_op()(this->ctx, hd_ylm); - delmem_var_op()(this->ctx, hd_ylm_deri); - delmem_var_op()(this->ctx, d_wk); + delmem_var_op()(hd_vq); + delmem_var_op()(hd_vq_deri); + delmem_var_op()(hd_ylm); + delmem_var_op()(hd_ylm_deri); + delmem_var_op()(d_wk); // delete memory on GPU if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, d_wg); - delmem_var_op()(this->ctx, d_ekb); - delmem_int_op()(this->ctx, atom_nh); - delmem_int_op()(this->ctx, atom_na); - delmem_var_op()(this->ctx, d_g_plus_k); - delmem_var_op()(this->ctx, d_pref); - delmem_var_op()(this->ctx, d_vq_tab); - delmem_complex_op()(this->ctx, this->d_pref_in); - delmem_int_op()(this->ctx, d_dvkb_indexes); + delmem_var_op()(d_wg); + delmem_var_op()(d_ekb); + delmem_int_op()(atom_nh); + delmem_int_op()(atom_na); + delmem_var_op()(d_g_plus_k); + delmem_var_op()(d_pref); + delmem_var_op()(d_vq_tab); + delmem_complex_op()(this->d_pref_in); + delmem_int_op()(d_dvkb_indexes); } if (becp != nullptr) { - delmem_complex_op()(this->ctx, becp); - delmem_complex_op()(this->ctx, hd_sk); + delmem_complex_op()(becp); + delmem_complex_op()(hd_sk); } if (dbecp != nullptr) { - delmem_complex_op()(this->ctx, dbecp); + delmem_complex_op()(dbecp); } if (this->pre_ik_f != -1) { - delmem_int_op()(this->ctx, gcar_zero_indexes); - delmem_complex_op()(this->ctx, vkb_save); - delmem_var_op()(this->ctx, gcar); + delmem_int_op()(gcar_zero_indexes); + delmem_complex_op()(vkb_save); + delmem_var_op()(gcar); } } diff --git a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp index 69df325d6c..f877eb1985 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp @@ -344,7 +344,7 @@ void HamiltPW::sPsi(const T* psi_in, // psi &ps[this->ppcell->indv_ijkb0[iat]], this->ppcell->nkb); } - delmem_complex_op()(ctx, qqc); + delmem_complex_op()(qqc); } } @@ -382,8 +382,8 @@ void HamiltPW::sPsi(const T* psi_in, // psi nrow); } } - delmem_complex_op()(this->ctx, ps); - delmem_complex_op()(this->ctx, becp); + delmem_complex_op()(ps); + delmem_complex_op()(becp); } } diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp index ae30a9d7e5..7c06dfc154 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp @@ -95,8 +95,8 @@ TEST_F(TestModuleHamiltEkinetic, ekinetic_pw_op_gpu) for (int ii = 0; ii < hpsi.size(); ii++) { EXPECT_LT(fabs(hpsi[ii] - expected_hpsi[ii]), 1e-6); } - delete_memory_double_op()(gpu_ctx, gk2_dev); - delete_memory_complex_double_op()(gpu_ctx, psi_dev); - delete_memory_complex_double_op()(gpu_ctx, hpsi_dev); + delete_memory_double_op()(gk2_dev); + delete_memory_complex_double_op()(psi_dev); + delete_memory_complex_double_op()(hpsi_dev); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp index 0161f2b6f4..be237b64ba 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp @@ -2943,9 +2943,9 @@ TEST_F(TestSrcPWForceMultiDevice, cal_vkb1_nl_op_gpu) EXPECT_LT(fabs(res[ii] - expected_vkb1[ii]), 6e-5); } - delmem_complex_op()(gpu_ctx, d_res); - delmem_complex_op()(gpu_ctx, d_vkb); - delmem_var_op()(gpu_ctx, d_gcar); + delmem_complex_op()(d_res); + delmem_complex_op()(d_vkb); + delmem_var_op()(d_gcar); } TEST_F(TestSrcPWForceMultiDevice, cal_force_nl_op_gpu) @@ -3005,16 +3005,16 @@ TEST_F(TestSrcPWForceMultiDevice, cal_force_nl_op_gpu) EXPECT_LT(fabs(res[ii] - expected_force[ii]), 6e-5); } - delmem_var_op()(gpu_ctx, d_wg); - delmem_var_op()(gpu_ctx, d_res); - delmem_var_op()(gpu_ctx, d_deeq); - delmem_var_op()(gpu_ctx, d_ekb); - delmem_var_op()(gpu_ctx, d_qq_nt); + delmem_var_op()(d_wg); + delmem_var_op()(d_res); + delmem_var_op()(d_deeq); + delmem_var_op()(d_ekb); + delmem_var_op()(d_qq_nt); - delmem_int_op()(gpu_ctx, d_atom_nh); - delmem_int_op()(gpu_ctx, d_atom_na); + delmem_int_op()(d_atom_nh); + delmem_int_op()(d_atom_na); - delmem_complex_op()(gpu_ctx, d_becp); - delmem_complex_op()(gpu_ctx, d_dbecp); + delmem_complex_op()(d_becp); + delmem_complex_op()(d_dbecp); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp index 14968e1d10..85caa61f4b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp @@ -75,9 +75,9 @@ TEST_F(TestModuleHamiltMeta, meta_pw_op_gpu) for (int ii = 0; ii < res.size(); ii++) { EXPECT_LT(fabs(res[ii] - expected_out[ii]), 6e-5); } - delmem_var_op()(gpu_ctx, d_gcar); - delmem_var_op()(gpu_ctx, d_kvec_c); - delmem_complex_op()(gpu_ctx, d_in); - delmem_complex_op()(gpu_ctx, d_res); + delmem_var_op()(d_gcar); + delmem_var_op()(d_kvec_c); + delmem_complex_op()(d_in); + delmem_complex_op()(d_res); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp index df28e1766a..47deaec255 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp @@ -147,9 +147,9 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_op_gpu) } EXPECT_EQ(sum, expected_sum); EXPECT_EQ(iat, expected_iat); - delete_memory_double_op()(gpu_ctx, deeq_dev); - delete_memory_complex_double_op()(gpu_ctx, ps_dev); - delete_memory_complex_double_op()(gpu_ctx, becp_dev); + delete_memory_double_op()(deeq_dev); + delete_memory_complex_double_op()(ps_dev); + delete_memory_complex_double_op()(becp_dev); } TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_spin_op_gpu) @@ -177,8 +177,8 @@ TEST_F(TestModuleHamiltNonlocal, nonlocal_pw_spin_op_gpu) } EXPECT_EQ(sum, expected_sum); EXPECT_EQ(iat, expected_iat); - delete_memory_complex_double_op()(gpu_ctx, deeq_dev); - delete_memory_complex_double_op()(gpu_ctx, ps_dev); - delete_memory_complex_double_op()(gpu_ctx, becp_dev); + delete_memory_complex_double_op()(deeq_dev); + delete_memory_complex_double_op()(ps_dev); + delete_memory_complex_double_op()(becp_dev); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp index 98390737f7..a3be95fce8 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp @@ -179,15 +179,15 @@ TEST(TestSrcPWStressMultiDevice, cal_dbecp_noevc_nl_op_gpu) EXPECT_LT(fabs(dbecp_noevc[ii] - expected_dbecpnoevc[ii]), 6e-5); } - delmem_zd_op()(gpu_ctx, d_vkb0i); - delmem_zd_op()(gpu_ctx, d_vkb0j); - delmem_zd_op()(gpu_ctx, d_vkb); - delmem_zd_op()(gpu_ctx, d_vkb1); - delmem_zd_op()(gpu_ctx, d_vkb2); - delmem_zd_op()(gpu_ctx, d_dbecp_noevc); - - delmem_dd_op()(gpu_ctx, d_gcar); - delmem_dd_op()(gpu_ctx, d_kvec_c); + delmem_zd_op()(d_vkb0i); + delmem_zd_op()(d_vkb0j); + delmem_zd_op()(d_vkb); + delmem_zd_op()(d_vkb1); + delmem_zd_op()(d_vkb2); + delmem_zd_op()(d_dbecp_noevc); + + delmem_dd_op()(d_gcar); + delmem_dd_op()(d_kvec_c); } TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) @@ -290,16 +290,16 @@ TEST(TestSrcPWStressMultiDevice, cal_stress_nl_op_gpu) EXPECT_LT(fabs(stress[ii] - expected_stress[ii]), 6e-5); } - delmem_zd_op()(gpu_ctx, d_becp); - delmem_zd_op()(gpu_ctx, d_dbecp); + delmem_zd_op()(d_becp); + delmem_zd_op()(d_dbecp); - delmem_dd_op()(gpu_ctx, d_wg); - delmem_dd_op()(gpu_ctx, d_deeq); - delmem_dd_op()(gpu_ctx, d_stress); - delmem_dd_op()(gpu_ctx, d_ekb); - delmem_dd_op()(gpu_ctx, d_qq_nt); + delmem_dd_op()(d_wg); + delmem_dd_op()(d_deeq); + delmem_dd_op()(d_stress); + delmem_dd_op()(d_ekb); + delmem_dd_op()(d_qq_nt); - delmem_int_op()(gpu_ctx, d_atom_nh); - delmem_int_op()(gpu_ctx, d_atom_na); + delmem_int_op()(d_atom_nh); + delmem_int_op()(d_atom_na); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp index 82fb4411ba..56c96157fd 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp @@ -100,8 +100,8 @@ TEST_F(TestModuleHamiltVeff, veff_pw_op_gpu) for (int ii = 0; ii < res.size(); ii++) { EXPECT_LT(fabs(res[ii] - expected_out[ii]), 6e-5); } - delete_memory_double_op()(gpu_ctx, d_in); - delete_memory_complex_op()(gpu_ctx, d_res); + delete_memory_double_op()(d_in); + delete_memory_complex_op()(d_res); } TEST_F(TestModuleHamiltVeff, veff_pw_spin_op_gpu) @@ -132,8 +132,8 @@ TEST_F(TestModuleHamiltVeff, veff_pw_spin_op_gpu) EXPECT_LT(fabs(res[ii] - expected_out_spin[ii]), 7.5e-5); EXPECT_LT(fabs(res1[ii] - expected_out1_spin[ii]), 6e-5); } - delete_memory_double_op()(gpu_ctx, d_in); - delete_memory_complex_op()(gpu_ctx, d_res); - delete_memory_complex_op()(gpu_ctx, d_res1); + delete_memory_double_op()(d_in); + delete_memory_complex_op()(d_res); + delete_memory_complex_op()(d_res1); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp index 1ce0e90c10..be5e6a8a68 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp @@ -4108,19 +4108,19 @@ TEST_F(TestSrcPWVnlMultiDevice, cal_vnl_op_gpu) EXPECT_LT(fabs(vkb[ii] - expected_vkb[ii]), 6e-5); } - delmem_int_op()(gpu_ctx, d_atom_na); - delmem_int_op()(gpu_ctx, d_atom_nh); - delmem_int_op()(gpu_ctx, d_atom_nb); + delmem_int_op()(d_atom_na); + delmem_int_op()(d_atom_nh); + delmem_int_op()(d_atom_nb); - delmem_var_op()(gpu_ctx, d_gk); - delmem_var_op()(gpu_ctx, d_ylm); - delmem_var_op()(gpu_ctx, d_indv); - delmem_var_op()(gpu_ctx, d_nhtol); - delmem_var_op()(gpu_ctx, d_nhtolm); - delmem_var_op()(gpu_ctx, d_tab); - delmem_var_op()(gpu_ctx, d_vkb1); + delmem_var_op()(d_gk); + delmem_var_op()(d_ylm); + delmem_var_op()(d_indv); + delmem_var_op()(d_nhtol); + delmem_var_op()(d_nhtolm); + delmem_var_op()(d_tab); + delmem_var_op()(d_vkb1); - delmem_complex_op()(gpu_ctx, d_sk); - delmem_complex_op()(gpu_ctx, d_vkb); + delmem_complex_op()(d_sk); + delmem_complex_op()(d_vkb); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp index e94c92c90d..2463234c31 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp @@ -465,16 +465,16 @@ TEST_F(TestSrcPWWfMultiDevice, cal_sk_op_gpu) EXPECT_LT(fabs(sk[ii] - expected_sk[ii]), 6e-5); } - delmem_int_op()(gpu_ctx, d_atom_na); - delmem_int_op()(gpu_ctx, d_igl2isz); - delmem_int_op()(gpu_ctx, d_is2fftixy); + delmem_int_op()(d_atom_na); + delmem_int_op()(d_igl2isz); + delmem_int_op()(d_is2fftixy); - delmem_var_op()(gpu_ctx, d_kvec_c); - delmem_var_op()(gpu_ctx, d_atom_tau); + delmem_var_op()(d_kvec_c); + delmem_var_op()(d_atom_tau); - delmem_complex_op()(gpu_ctx, d_sk); - delmem_complex_op()(gpu_ctx, d_eigts1); - delmem_complex_op()(gpu_ctx, d_eigts2); - delmem_complex_op()(gpu_ctx, d_eigts3); + delmem_complex_op()(d_sk); + delmem_complex_op()(d_eigts1); + delmem_complex_op()(d_eigts2); + delmem_complex_op()(d_eigts3); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp index 145cee7142..e15793cbdc 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp @@ -230,39 +230,39 @@ void Onsite_Proj_tools::delete_memory() { // delete memory - delmem_var_op()(this->ctx, hd_vq); - delmem_var_op()(this->ctx, hd_vq_deri); - delmem_var_op()(this->ctx, hd_ylm); - delmem_var_op()(this->ctx, hd_ylm_deri); + delmem_var_op()(hd_vq); + delmem_var_op()(hd_vq_deri); + delmem_var_op()(hd_ylm); + delmem_var_op()(hd_ylm_deri); // delete memory on GPU if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, d_wg); - delmem_var_op()(this->ctx, d_ekb); - delmem_int_op()(this->ctx, atom_nh); - delmem_int_op()(this->ctx, atom_na); - delmem_var_op()(this->ctx, d_g_plus_k); - delmem_var_op()(this->ctx, d_pref); - delmem_var_op()(this->ctx, d_vq_tab); - delmem_complex_op()(this->ctx, this->d_pref_in); - delmem_int_op()(this->ctx, d_dvkb_indexes); + delmem_var_op()(d_wg); + delmem_var_op()(d_ekb); + delmem_int_op()(atom_nh); + delmem_int_op()(atom_na); + delmem_var_op()(d_g_plus_k); + delmem_var_op()(d_pref); + delmem_var_op()(d_vq_tab); + delmem_complex_op()(this->d_pref_in); + delmem_int_op()(d_dvkb_indexes); } if (becp != nullptr) { - delmem_complex_op()(this->ctx, becp); - delmem_complex_op()(this->ctx, hd_sk); + delmem_complex_op()(becp); + delmem_complex_op()(hd_sk); } if (dbecp != nullptr) { - delmem_complex_op()(this->ctx, dbecp); + delmem_complex_op()(dbecp); } if (this->pre_ik_f != -1) { - delmem_int_op()(this->ctx, gcar_zero_indexes); - delmem_complex_op()(this->ctx, vkb_save); - delmem_var_op()(this->ctx, gcar); + delmem_int_op()(gcar_zero_indexes); + delmem_complex_op()(vkb_save); + delmem_var_op()(gcar); } } @@ -447,7 +447,7 @@ void Onsite_Proj_tools::cal_becp(int ik, syncmem_complex_d2h_op()(h_becp, becp_tmp, size_becp_act); Parallel_Reduce::reduce_pool(h_becp, size_becp_act); syncmem_complex_h2d_op()(becp_tmp, h_becp, size_becp_act); - delmem_complex_h_op()(this->cpu_ctx, h_becp); + delmem_complex_h_op()(h_becp); } else { @@ -853,8 +853,8 @@ void Onsite_Proj_tools::cal_force_dftu(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - delmem_complex_op()(this->ctx, vu_tmp); - delmem_int_op()(this->ctx, orbital_corr_tmp); + delmem_complex_op()(vu_tmp); + delmem_int_op()(orbital_corr_tmp); } #endif } @@ -908,7 +908,7 @@ void Onsite_Proj_tools::cal_force_dspin(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, lambda_tmp); + delmem_var_op()(lambda_tmp); } #endif } @@ -957,8 +957,8 @@ void Onsite_Proj_tools::cal_stress_dftu(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - delmem_complex_op()(this->ctx, vu_tmp); - delmem_int_op()(this->ctx, orbital_corr_tmp); + delmem_complex_op()(vu_tmp); + delmem_int_op()(orbital_corr_tmp); } #endif } @@ -1009,7 +1009,7 @@ void Onsite_Proj_tools::cal_stress_dspin(int ik, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, lambda_tmp); + delmem_var_op()(lambda_tmp); } #endif } diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp index 832379f445..f235df15e5 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/onsite_projector.cpp @@ -191,12 +191,12 @@ projectors::OnsiteProjector::~OnsiteProjector() { //delete[] becp; delete fs_tools; - delmem_complex_op()(this->ctx, this->tab_atomic_); + delmem_complex_op()(this->tab_atomic_); if(this->device == base_device::GpuDevice) { - delmem_complex_h_op()(this->cpu_ctx, this->h_becp); + delmem_complex_h_op()(this->h_becp); } - delmem_complex_op()(this->ctx, this->becp); + delmem_complex_op()(this->becp); } diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp index 83db1d98fa..dc8a566d05 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp @@ -34,7 +34,7 @@ Meta>::Meta(Real tpiba_in, template Meta>::~Meta() { - delmem_complex_op()(this->ctx, this->porter); + delmem_complex_op()(this->porter); } template diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp index e2ec876872..7446151d36 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp @@ -35,8 +35,8 @@ Nonlocal>::Nonlocal(const int* isk_in, template Nonlocal>::~Nonlocal() { - delmem_complex_op()(this->ctx, this->ps); - delmem_complex_op()(this->ctx, this->becp); + delmem_complex_op()(this->ps); + delmem_complex_op()(this->becp); } template diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp index e03ba5494e..3cfd345356 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp @@ -30,22 +30,22 @@ OnsiteProj>::OnsiteProj(const int* isk_in, template OnsiteProj>::~OnsiteProj() { - delmem_complex_op()(this->ctx, this->ps); + delmem_complex_op()(this->ps); if(this->init_delta_spin) { - delmem_int_op()(this->ctx, this->ip_iat); - delmem_complex_op()(this->ctx, this->lambda_coeff); + delmem_int_op()(this->ip_iat); + delmem_complex_op()(this->lambda_coeff); } if(this->has_dftu) { if(!init_delta_spin) { - delmem_int_op()(this->ctx, this->ip_iat); + delmem_int_op()(this->ip_iat); } - delmem_int_op()(this->ctx, this->orb_l_iat); - delmem_int_op()(this->ctx, this->ip_m); - delmem_int_op()(this->ctx, this->vu_begin_iat); - delmem_complex_op()(this->ctx, this->vu_device); + delmem_int_op()(this->orb_l_iat); + delmem_int_op()(this->ip_m); + delmem_int_op()(this->vu_begin_iat); + delmem_complex_op()(this->vu_device); } } diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp index ab7dce9e7a..6bff6b2dc0 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/veff_pw.cpp @@ -31,8 +31,8 @@ Veff>::Veff(const int* isk_in, template Veff>::~Veff() { - delmem_complex_op()(this->ctx, this->porter); - delmem_complex_op()(this->ctx, this->porter1); + delmem_complex_op()(this->porter); + delmem_complex_op()(this->porter1); } template diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp index 04d0744743..bbdefb737a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp @@ -313,11 +313,11 @@ void Stress_Func::deriv_drhoc r,rhoc,gx_arr.data()+igl0,rab,drhocg+igl0,mesh,igl0,rho_basis->ngg-igl0,omega,type); } - delmem_var_op()(this->ctx, r_d); - delmem_var_op()(this->ctx, rhoc_d); - delmem_var_op()(this->ctx, rab_d); - delmem_var_op()(this->ctx, gx_arr_d); - delmem_var_op()(this->ctx, drhocg_d); + delmem_var_op()(r_d); + delmem_var_op()(rhoc_d); + delmem_var_op()(rab_d); + delmem_var_op()(gx_arr_d); + delmem_var_op()(drhocg_d); return; } diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp index 657e3c760d..1af82ba153 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp @@ -70,7 +70,7 @@ void Stress_Func::stress_nl(ModuleBase::matrix& sigma, } // transfer stress from device to host syncmem_var_d2h_op()(sigmanlc.data(), stress_device, 9); - delmem_var_op()(this->ctx, stress_device); + delmem_var_op()(stress_device); // sum up forcenl from all processors for (int l = 0; l < 3; l++) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp index 21ace6f7cc..acce052e83 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp @@ -69,7 +69,7 @@ void Stress_Func::stress_onsite(ModuleBase::matrix& sigma, } // transfer stress from device to host syncmem_var_d2h_op()(sigma_onsite.data(), stress_device, 9); - delmem_var_op()(this->ctx, stress_device); + delmem_var_op()(stress_device); // sum up forcenl from all processors for (int l = 0; l < 3; l++) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp index d8a9434197..4e328c1fda 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp @@ -27,19 +27,19 @@ Structure_Factor::~Structure_Factor() { if (device == "gpu") { if (PARAM.inp.precision == "single") { - delmem_cd_op()(gpu_ctx, this->c_eigts1); - delmem_cd_op()(gpu_ctx, this->c_eigts2); - delmem_cd_op()(gpu_ctx, this->c_eigts3); + delmem_cd_op()(this->c_eigts1); + delmem_cd_op()(this->c_eigts2); + delmem_cd_op()(this->c_eigts3); } - delmem_zd_op()(gpu_ctx, this->z_eigts1); - delmem_zd_op()(gpu_ctx, this->z_eigts2); - delmem_zd_op()(gpu_ctx, this->z_eigts3); + delmem_zd_op()(this->z_eigts1); + delmem_zd_op()(this->z_eigts2); + delmem_zd_op()(this->z_eigts3); } else { if (PARAM.inp.precision == "single") { - delmem_ch_op()(cpu_ctx, this->c_eigts1); - delmem_ch_op()(cpu_ctx, this->c_eigts2); - delmem_ch_op()(cpu_ctx, this->c_eigts3); + delmem_ch_op()(this->c_eigts1); + delmem_ch_op()(this->c_eigts2); + delmem_ch_op()(this->c_eigts3); } // There's no need to delete double precision pointers while in a CPU environment. } diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp index 6d255a787b..bca92ac1cf 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor_k.cpp @@ -135,8 +135,8 @@ void Structure_Factor::get_sk(Device* ctx, sk); if (device == base_device::GpuDevice) { - delmem_int_op()(ctx, atom_na); - delmem_var_op()(ctx, atom_tau); + delmem_int_op()(atom_na); + delmem_var_op()(atom_tau); } delete[] h_atom_na; delete[] h_atom_tau; diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp index 8c50427459..34e20977eb 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_che.cpp @@ -9,7 +9,7 @@ StoChe::~StoChe() { delete p_che; delete[] spolyv_cpu; - delmem_var_op()(this->ctx, spolyv); + delmem_var_op()(spolyv); } template diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_che.h b/source/module_hamilt_pw/hamilt_stodft/sto_che.h index 7557bb065a..f241553b66 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_che.h +++ b/source/module_hamilt_pw/hamilt_stodft/sto_che.h @@ -59,8 +59,8 @@ REAL vTMv(const REAL* v, const REAL* M, const int n) base_device::memory::synchronize_memory_op()(&result, dot_device, 1); - base_device::memory::delete_memory_op()(ctx, y); - base_device::memory::delete_memory_op()(ctx, dot_device); + base_device::memory::delete_memory_op()(y); + base_device::memory::delete_memory_op()(dot_device); return result; } diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp index 0b01acb148..6684332781 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp @@ -251,7 +251,7 @@ void Sto_Forces::cal_sto_force_nl( } // end ik syncmem_var_d2h_op()(forcenl.c, force, forcenl.nr * forcenl.nc); - delmem_var_op()(this->ctx, force); + delmem_var_op()(force); // sum up forcenl from all processors Parallel_Reduce::reduce_all(forcenl.c, forcenl.nr * forcenl.nc); diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp index cddfb5f81f..8ec669febd 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp @@ -30,7 +30,7 @@ void Stochastic_Iter::dot(const int& n, const Real* x, const int& inc resmem_var_op()(result_device, 1); container::kernels::blas_dot()(n, p_che->coef_real, 1, spolyv, 1, result_device); syncmem_var_d2h_op()(&result, result_device, 1); - delmem_var_op()(this->ctx, result_device); + delmem_var_op()(result_device); } template @@ -109,7 +109,7 @@ void Stochastic_Iter::orthog(const int& ik, psi::Psi& psi, &ModuleBase::ONE, wfgout, npwx); - delmem_complex_op()(this->ctx, sum); + delmem_complex_op()(sum); } ModuleBase::timer::tick("Stochastic_Iter", "orthog"); } @@ -549,7 +549,7 @@ void Stochastic_Iter::sum_stoeband(Stochastic_WF& stowf, tmpin += npwx; tmpout += npwx; } - delmem_complex_op()(this->ctx, hshchi); + delmem_complex_op()(hshchi); } } #ifdef __MPI @@ -633,7 +633,7 @@ void Stochastic_Iter::cal_storho(const UnitCell& ucell, pes->rho = reinterpret_cast(pes->charge->rho); } - delmem_complex_op()(this->ctx, porter); + delmem_complex_op()(porter); #ifdef __MPI if(GlobalV::KPAR > 1) { @@ -739,7 +739,7 @@ void Stochastic_Iter::calTnchi_ik(const int& ik, Stochastic_WFcoef_real, p_che->norder); gemv_op()(this->ctx, transa, M, N, &one, stowf.chiallorder[ik].get_pointer(), LDA, coef_real, inc, &zero, out, inc); // zgemv_(&transa, &M, &N, &one, stowf.chiallorder[ik].get_pointer(), &LDA, coef_real, &inc, &zero, out, &inc); - delmem_complex_op()(this->ctx, coef_real); + delmem_complex_op()(coef_real); } else { diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp index f875604147..62a4c16779 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_stress_pw.cpp @@ -202,7 +202,7 @@ void Sto_Stress_PW::sto_stress_nl(ModuleBase::matrix& sigma, // transfer stress from device to host syncmem_var_d2h_op()(sigmanlc.data(), stress_device, 9); - delmem_var_op()(this->ctx, stress_device); + delmem_var_op()(stress_device); // sum up forcenl from all processors for (int l = 0; l < 3; l++) { diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index d89d2292c3..f7daf229a2 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -78,17 +78,17 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond template Diago_DavSubspace::~Diago_DavSubspace() { - delmem_complex_op()(this->ctx, this->psi_in_iter); + delmem_complex_op()(this->psi_in_iter); - delmem_complex_op()(this->ctx, this->hphi); - delmem_complex_op()(this->ctx, this->hcc); - delmem_complex_op()(this->ctx, this->scc); - delmem_complex_op()(this->ctx, this->vcc); + delmem_complex_op()(this->hphi); + delmem_complex_op()(this->hcc); + delmem_complex_op()(this->scc); + delmem_complex_op()(this->vcc); #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - delmem_real_op()(this->ctx, this->d_precondition); + delmem_real_op()(this->d_precondition); } #endif } @@ -316,7 +316,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, } if(this->device == base_device::GpuDevice) { - delmem_real_op()(this->ctx, e_temp_hd); + delmem_real_op()(e_temp_hd); } #ifdef __DSP @@ -560,13 +560,13 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, { base_device::memory::synchronize_memory_op()(vcc + i * nbase_x, vcc_gpu + i * nbase, nbase); } - delmem_complex_op()(this->ctx, hcc_gpu); - delmem_complex_op()(this->ctx, scc_gpu); - delmem_complex_op()(this->ctx, vcc_gpu); + delmem_complex_op()(hcc_gpu); + delmem_complex_op()(scc_gpu); + delmem_complex_op()(vcc_gpu); syncmem_var_d2h_op()((*eigenvalue_iter).data(), eigenvalue_gpu, this->nbase_x); - delmem_real_op()(this->ctx, eigenvalue_gpu); + delmem_real_op()(eigenvalue_gpu); } #endif } @@ -754,9 +754,9 @@ void Diago_DavSubspace::refresh(const int& dim, syncmem_h2d_op()(scc, scc_cpu, this->nbase_x * this->nbase_x); syncmem_h2d_op()(vcc, vcc_cpu, this->nbase_x * this->nbase_x); - base_device::memory::delete_memory_op()(this->cpu_ctx, hcc_cpu); - base_device::memory::delete_memory_op()(this->cpu_ctx, scc_cpu); - base_device::memory::delete_memory_op()(this->cpu_ctx, vcc_cpu); + base_device::memory::delete_memory_op()(hcc_cpu); + base_device::memory::delete_memory_op()(scc_cpu); + base_device::memory::delete_memory_op()(vcc_cpu); #endif } else diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index 200a6705d9..6afaf998b8 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -105,19 +105,19 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, template DiagoDavid::~DiagoDavid() { - delmem_complex_op()(this->ctx, this->basis); - delmem_complex_op()(this->ctx, this->hpsi); - delmem_complex_op()(this->ctx, this->spsi); - delmem_complex_op()(this->ctx, this->hcc); + delmem_complex_op()(this->basis); + delmem_complex_op()(this->hpsi); + delmem_complex_op()(this->spsi); + delmem_complex_op()(this->hcc); // delmem_complex_op()(this->ctx, this->scc); - delmem_complex_op()(this->ctx, this->vcc); - delmem_complex_op()(this->ctx, this->lagrange_matrix); - base_device::memory::delete_memory_op()(this->cpu_ctx, this->eigenvalue); + delmem_complex_op()(this->vcc); + delmem_complex_op()(this->lagrange_matrix); + base_device::memory::delete_memory_op()(this->eigenvalue); // If the device is a GPU device, free the d_precondition array. #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - delmem_var_op()(this->ctx, this->d_precondition); + delmem_var_op()(this->d_precondition); } #endif } @@ -422,7 +422,7 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, vc_ev_vector + m * nbase, vc_ev_vector + m * nbase, e_temp_gpu); - delmem_var_op()(this->ctx, e_temp_gpu); + delmem_var_op()(e_temp_gpu); #endif } else @@ -565,8 +565,8 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // hpsi[:, nbase:nbase+notcnv] = H basis[:, nbase:nbase+notcnv] hpsi_func(basis + nbase * dim, hpsi + nbase * dim, dim, notconv); - delmem_complex_op()(this->ctx, lagrange); - delmem_complex_op()(this->ctx, vc_ev_vector); + delmem_complex_op()(lagrange); + delmem_complex_op()(vc_ev_vector); ModuleBase::timer::tick("DiagoDavid", "cal_grad"); return; @@ -702,7 +702,7 @@ void DiagoDavid::diag_zhegvx(const int& nbase, dnevx_op()(this->ctx, nbase, nbase_x, hcc, nband, eigenvalue_gpu, vcc); syncmem_var_d2h_op()(this->eigenvalue, eigenvalue_gpu, nbase_x); - delmem_var_op()(this->ctx, eigenvalue_gpu); + delmem_var_op()(eigenvalue_gpu); #endif } else @@ -846,9 +846,9 @@ void DiagoDavid::refresh(const int& dim, // syncmem_h2d_op()(this->ctx, this->cpu_ctx, scc, scc_cpu, nbase_x * nbase_x); syncmem_h2d_op()(vcc, vcc_cpu, nbase_x * nbase_x); - base_device::memory::delete_memory_op()(this->cpu_ctx, hcc_cpu); + base_device::memory::delete_memory_op()(hcc_cpu); // base_device::memory::delete_memory_op()(this->cpu_ctx, scc_cpu); - base_device::memory::delete_memory_op()(this->cpu_ctx, vcc_cpu); + base_device::memory::delete_memory_op()(vcc_cpu); #endif } else diff --git a/source/module_hsolver/diago_iter_assist.cpp b/source/module_hsolver/diago_iter_assist.cpp index 223df68006..5a3acf8e53 100644 --- a/source/module_hsolver/diago_iter_assist.cpp +++ b/source/module_hsolver/diago_iter_assist.cpp @@ -140,11 +140,11 @@ void DiagoIterAssist::diagH_subspace(const hamilt::Hamilt* if (!in_place) { matrixSetToAnother()(ctx, n_band, temp, ld_temp, evc.get_pointer(), dmax); - delmem_complex_op()(ctx, temp); + delmem_complex_op()(temp); } - delmem_complex_op()(ctx, hcc); - delmem_complex_op()(ctx, scc); - delmem_complex_op()(ctx, vcc); + delmem_complex_op()(hcc); + delmem_complex_op()(scc); + delmem_complex_op()(vcc); ModuleBase::timer::tick("DiagoIterAssist", "diagH_subspace"); } @@ -245,7 +245,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* scc + i * nstart, 1); } - delmem_complex_op()(ctx, temp); + delmem_complex_op()(temp); } else if (base_device::get_device_type(ctx) == base_device::CpuDevice) { @@ -271,7 +271,7 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* pHamilt->sPsi(ppsi, spsi, psi_temp.get_nbasis(), psi_temp.get_nbasis(), psi_temp.get_nbands()); gemm_op()(ctx, 'C', 'N', nstart, nstart, dmin, &one, ppsi, dmax, spsi, dmax, &zero, scc, nstart); - delmem_complex_op()(ctx, temp); + delmem_complex_op()(temp); add_to_hcc(hcc, nstart); @@ -358,9 +358,9 @@ void DiagoIterAssist::diagH_subspace_init(hamilt::Hamilt* // delmem_complex_op()(ctx, evctemp); } - delmem_complex_op()(ctx, hcc); - delmem_complex_op()(ctx, scc); - delmem_complex_op()(ctx, vcc); + delmem_complex_op()(hcc); + delmem_complex_op()(scc); + delmem_complex_op()(vcc); ModuleBase::timer::tick("DiagoIterAssist", "diagH_subspace_init"); } @@ -395,7 +395,7 @@ void DiagoIterAssist::diagH_LAPACK(const int nstart, syncmem_var_op()(e, eigenvalues, nbands); } - delmem_var_op()(ctx, eigenvalues); + delmem_var_op()(eigenvalues); // const bool all_eigenvalues = (nstart == nbands); // if (all_eigenvalues) { @@ -483,7 +483,7 @@ void DiagoIterAssist::cal_hs_subspace(const hamilt::Hamilt Parallel_Reduce::reduce_pool(scc, nstart * nstart); } - delmem_complex_op()(ctx, temp); + delmem_complex_op()(temp); } template @@ -525,7 +525,7 @@ void DiagoIterAssist::diag_responce( const T* hcc, mat_col); } - delmem_complex_op()(ctx, vcc); + delmem_complex_op()(vcc); ModuleBase::timer::tick("DiagoIterAssist", "diag_responce"); } @@ -572,10 +572,10 @@ void DiagoIterAssist::diag_subspace_psi(const T* hcc, temp, dmin); matrixSetToAnother()(ctx, n_band, temp, dmin, evc.get_pointer(), dmax); - delmem_complex_op()(ctx, temp); + delmem_complex_op()(temp); } - delmem_complex_op()(ctx, vcc); + delmem_complex_op()(vcc); ModuleBase::timer::tick("DiagoIterAssist", "diag_subspace_psi"); } diff --git a/source/module_hsolver/kernels/cuda/math_kernel_op.cu b/source/module_hsolver/kernels/cuda/math_kernel_op.cu index 3ad26ef3a2..cd3ac41812 100644 --- a/source/module_hsolver/kernels/cuda/math_kernel_op.cu +++ b/source/module_hsolver/kernels/cuda/math_kernel_op.cu @@ -910,7 +910,7 @@ void matrixTranspose_op::operator()(const base_ device_temp, row * col); - base_device::memory::delete_memory_op()(d, device_temp); + base_device::memory::delete_memory_op()(device_temp); } template <> @@ -949,7 +949,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator( device_temp, row * col); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(d, device_temp); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); cudaCheckOnDebug(); @@ -987,7 +987,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(output_matrix, device_temp, row * col); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(d, device_temp); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); } template <> diff --git a/source/module_hsolver/kernels/math_kernel_op.cpp b/source/module_hsolver/kernels/math_kernel_op.cpp index b0930e02d9..db2a12e9db 100644 --- a/source/module_hsolver/kernels/math_kernel_op.cpp +++ b/source/module_hsolver/kernels/math_kernel_op.cpp @@ -341,7 +341,7 @@ struct matrixTranspose_op { output_matrix[i] = temp[i]; } - base_device::memory::delete_memory_op()(d, temp); + base_device::memory::delete_memory_op()(temp); } }; diff --git a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu index d4185ff81c..1993ae4c64 100644 --- a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu +++ b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu @@ -827,7 +827,7 @@ void matrixTranspose_op::operator()(const base_ device_temp, row * col); - base_device::memory::delete_memory_op()(d, device_temp); + base_device::memory::delete_memory_op()(device_temp); } template <> @@ -865,7 +865,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator( device_temp, row * col); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(d, device_temp); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); } template <> @@ -896,7 +896,7 @@ void matrixTranspose_op, base_device::DEVICE_GPU>::operator base_device::DEVICE_GPU, base_device::DEVICE_GPU>()(output_matrix, device_temp, row * col); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(d, device_temp); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); } template <> diff --git a/source/module_hsolver/kernels/test/math_kernel_test.cpp b/source/module_hsolver/kernels/test/math_kernel_test.cpp index e69bc29b5f..0781d54787 100644 --- a/source/module_hsolver/kernels/test/math_kernel_test.cpp +++ b/source/module_hsolver/kernels/test/math_kernel_test.cpp @@ -379,8 +379,8 @@ TEST_F(TestModuleHsolverMathKernel, zdot_real_op_gpu) double result = zdot_real_gpu_op()(gpu_ctx, dim, psi_L_dev, psi_R_dev, false); hsolver::destoryBLAShandle(); EXPECT_LT(fabs(result - expected_result), 1e-12); - delete_memory_op()(gpu_ctx, psi_L_dev); - delete_memory_op()(gpu_ctx, psi_R_dev); + delete_memory_op()(psi_L_dev); + delete_memory_op()(psi_R_dev); } TEST_F(TestModuleHsolverMathKernel, vector_div_constant_op_gpu) @@ -404,8 +404,8 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_constant_op_gpu) EXPECT_LT(fabs(output[i].imag() - output_vector_div_constant_op[i].imag()), 1e-8); EXPECT_LT(fabs(output[i].real() - output_vector_div_constant_op[i].real()), 1e-8); } - delete_memory_op()(gpu_ctx, input_dev); - delete_memory_op()(gpu_ctx, output_dev); + delete_memory_op()(input_dev); + delete_memory_op()(output_dev); } TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) @@ -439,9 +439,9 @@ TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) EXPECT_LT(fabs(output[i].real() - output_vector_mul_vector_op[i].real()), 1e-8); } - delete_memory_op()(gpu_ctx, input_dev); - delete_memory_op_double()(gpu_ctx, input_double_dev); - delete_memory_op()(gpu_ctx, output_dev); + delete_memory_op()(input_dev); + delete_memory_op_double()(input_double_dev); + delete_memory_op()(output_dev); } TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_gpu) @@ -475,9 +475,9 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_gpu) EXPECT_LT(fabs(output[i].real() - output_vector_div_vector_op[i].real()), 1e-8); } - delete_memory_op()(gpu_ctx, input_dev); - delete_memory_op_double()(gpu_ctx, input_double_dev); - delete_memory_op()(gpu_ctx, output_dev); + delete_memory_op()(input_dev); + delete_memory_op_double()(input_double_dev); + delete_memory_op()(output_dev); } TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_gpu) @@ -517,9 +517,9 @@ TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_gp EXPECT_LT(fabs(output[i].real() - output_constantvector_addORsub_constantVector_op[i].real()), 1e-8); } - delete_memory_op()(gpu_ctx, input1_dev); - delete_memory_op()(gpu_ctx, input2_dev); - delete_memory_op()(gpu_ctx, output_dev); + delete_memory_op()(input1_dev); + delete_memory_op()(input2_dev); + delete_memory_op()(output_dev); } TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu) @@ -550,8 +550,8 @@ TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu) EXPECT_LT(fabs(Y_axpy[i].real() - output_axpy_op[i].real()), 1e-8); } - delete_memory_op()(gpu_ctx, X_axpy_dev); - delete_memory_op()(gpu_ctx, Y_axpy_dev); + delete_memory_op()(X_axpy_dev); + delete_memory_op()(Y_axpy_dev); } TEST_F(TestModuleHsolverMathKernel, scal_op_gpu) @@ -578,7 +578,7 @@ TEST_F(TestModuleHsolverMathKernel, scal_op_gpu) EXPECT_LT(fabs(X_scal[i].imag() - output_scal_op[i].imag()), 1e-8); EXPECT_LT(fabs(X_scal[i].real() - output_scal_op[i].real()), 1e-8); } - delete_memory_op()(gpu_ctx, X_scal_dev); + delete_memory_op()(X_scal_dev); } TEST_F(TestModuleHsolverMathKernel, gemv_op_gpu) @@ -628,9 +628,9 @@ TEST_F(TestModuleHsolverMathKernel, gemv_op_gpu) EXPECT_LT(fabs(Y_gemv[i].real() - Y_test_gemv[i].real()), 1e-12); } - delete_memory_op()(gpu_ctx, A_gemv_dev); - delete_memory_op()(gpu_ctx, X_gemv_dev); - delete_memory_op()(gpu_ctx, Y_gemv_dev); + delete_memory_op()(A_gemv_dev); + delete_memory_op()(X_gemv_dev); + delete_memory_op()(Y_gemv_dev); } TEST_F(TestModuleHsolverMathKernel, matrixSetToAnother_op_gpu) @@ -715,8 +715,8 @@ TEST_F(TestModuleHsolverMathKernel, matrixSetToAnother_op_gpu) EXPECT_LT(fabs(B_gpu2cpu[i].real() - B_cpu[i].real()), 1e-12); } - delete_memory_op()(gpu_ctx, device_A); - delete_memory_op()(gpu_ctx, device_B); + delete_memory_op()(device_A); + delete_memory_op()(device_B); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/module_hsolver/test/hsolver_pw_sup.h b/source/module_hsolver/test/hsolver_pw_sup.h index fcb2862a29..c61ffaca7d 100644 --- a/source/module_hsolver/test/hsolver_pw_sup.h +++ b/source/module_hsolver/test/hsolver_pw_sup.h @@ -139,15 +139,13 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, template DiagoDavid::~DiagoDavid() { - delmem_complex_op()(this->ctx, this->hpsi); - delmem_complex_op()(this->ctx, this->spsi); - delmem_complex_op()(this->ctx, this->hcc); - delmem_complex_op()(this->ctx, this->scc); - delmem_complex_op()(this->ctx, this->vcc); - delmem_complex_op()(this->ctx, this->lagrange_matrix); - base_device::memory::delete_memory_op()( - this->cpu_ctx, - this->eigenvalue); + delmem_complex_op()(this->hpsi); + delmem_complex_op()(this->spsi); + delmem_complex_op()(this->hcc); + delmem_complex_op()(this->scc); + delmem_complex_op()(this->vcc); + delmem_complex_op()(this->lagrange_matrix); + base_device::memory::delete_memory_op()(this->eigenvalue); } template diff --git a/source/module_psi/psi.cpp b/source/module_psi/psi.cpp index cc2a2d5d41..a69635dffb 100644 --- a/source/module_psi/psi.cpp +++ b/source/module_psi/psi.cpp @@ -40,7 +40,7 @@ Psi::~Psi() { if (this->allocate_inside) { - delete_memory_op()(this->ctx, this->psi); + delete_memory_op()(this->psi); } }