|
7 | 7 | #include "module_hsolver/kernels/dngvd_op.h" |
8 | 8 | #include "module_hsolver/kernels/math_kernel_op.h" |
9 | 9 | #include "module_base/kernels/dsp/dsp_connector.h" |
10 | | - |
| 10 | +#include <hip/hip_runtime.h> |
11 | 11 | #include <vector> |
12 | 12 |
|
13 | 13 | using namespace hsolver; |
@@ -261,6 +261,16 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func, |
261 | 261 | bool test_precond = true; |
262 | 262 | bool test_norm = true; |
263 | 263 | ModuleBase::timer::tick("Diago_DavSubspace", "cal_grad"); |
| 264 | +/***#if defined(__CUDA) || defined(__ROCM) |
| 265 | + if (this->device == base_device::GpuDevice) { |
| 266 | + size_t free_mem, total_mem; |
| 267 | + hipMemGetInfo(&free_mem, &total_mem); |
| 268 | + std::cout << "\n[Memory Debug] Before operations - Free GPU memory: " << free_mem / 1024.0 / 1024.0 |
| 269 | + << " MB, Total: " << total_mem / 1024.0 / 1024.0 << " MB" << std::endl; |
| 270 | + std::cout << "[Memory Debug] Dimensions - dim: " << dim << ", nbase: " << nbase |
| 271 | + << ", notconv: " << notconv << std::endl; |
| 272 | + } |
| 273 | +#endif***/ |
264 | 274 | for (size_t i = 0; i < notconv; i++) |
265 | 275 | { |
266 | 276 | if (unconv[i] != i) |
@@ -292,13 +302,27 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func, |
292 | 302 |
|
293 | 303 | // Eigenvalues operation section |
294 | 304 | if (test_eigs) { |
| 305 | +/***#if defined(__CUDA) || defined(__ROCM) |
| 306 | + if (this->device == base_device::GpuDevice) { |
| 307 | + size_t free_mem, total_mem; |
| 308 | + hipMemGetInfo(&free_mem, &total_mem); |
| 309 | + std::cout << "\n[Memory Debug] Before eigenvalues op - Free GPU memory: " << free_mem / 1024.0 / 1024.0 |
| 310 | + << " MB" << std::endl; |
| 311 | + } |
| 312 | +#endif***/ |
295 | 313 | // Original implementation |
296 | 314 | std::vector<Real> e_temp_cpu(this->notconv, 0); |
297 | 315 | Real* e_temp_hd = e_temp_cpu.data(); |
298 | 316 | if (this->device == base_device::GpuDevice) |
299 | 317 | { |
300 | 318 | e_temp_hd = nullptr; |
301 | | - resmem_real_op()(this->ctx, e_temp_hd, this->notconv); |
| 319 | + try { |
| 320 | + resmem_real_op()(this->ctx, e_temp_hd, this->notconv); |
| 321 | + } catch (const std::exception& e) { |
| 322 | + std::cerr << "[Memory Debug] Failed to allocate e_temp_hd of size " |
| 323 | + << (this->notconv * sizeof(Real)) << " bytes: " << e.what() << std::endl; |
| 324 | + throw; |
| 325 | + } |
302 | 326 | } |
303 | 327 |
|
304 | 328 | for (int m = 0; m < this->notconv; m++) |
@@ -371,10 +395,18 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func, |
371 | 395 | #if defined(__CUDA) || defined(__ROCM) |
372 | 396 | if (this->device == base_device::GpuDevice) |
373 | 397 | { |
| 398 | + /*** size_t free_mem, total_mem; |
| 399 | + hipMemGetInfo(&free_mem, &total_mem); |
| 400 | + std::cout << "\n[Memory Debug] Before precondition op - Free GPU memory: " << free_mem / 1024.0 / 1024.0 << " MB" << std::endl;***/ |
374 | 401 | Real* eigenvalues_gpu = nullptr; |
375 | | - resmem_real_op()(this->ctx, eigenvalues_gpu, notconv); |
376 | | - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalues_gpu, (*eigenvalue_iter).data(), notconv); |
377 | | - |
| 402 | + try { |
| 403 | + resmem_real_op()(this->ctx, eigenvalues_gpu, notconv); |
| 404 | + syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalues_gpu,(*eigenvalue_iter).data(), notconv); |
| 405 | + } catch (const std::exception& e) { |
| 406 | + std::cerr << "[Memory Debug] Failed to allocate eigenvalues_gpu of size " |
| 407 | + << (notconv * sizeof(Real)) << " bytes: " << e.what() << std::endl; |
| 408 | + throw; |
| 409 | + } |
378 | 410 | precondition_op<T, Device>()(this->ctx, |
379 | 411 | this->dim, |
380 | 412 | psi_iter, |
@@ -433,9 +465,20 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func, |
433 | 465 | #if defined(__CUDA) || defined(__ROCM) |
434 | 466 | if (this->device == base_device::GpuDevice) |
435 | 467 | { |
| 468 | + /***size_t free_mem, total_mem; |
| 469 | + hipMemGetInfo(&free_mem, &total_mem); |
| 470 | + std::cout << "\n[Memory Debug] Before normalize op - Free GPU memory: " << free_mem / 1024.0 / 1024.0 << " MB" << std::endl;***/ |
436 | 471 | Real* psi_norm = nullptr; |
437 | | - resmem_real_op()(this->ctx, psi_norm, notconv); |
438 | | - cudaMemset(psi_norm, 0, notconv * sizeof(Real)); |
| 472 | + try { |
| 473 | + resmem_real_op()(this->ctx, psi_norm, notconv); |
| 474 | + using setmem_real_op = base_device::memory::set_memory_op<Real, Device>; |
| 475 | + setmem_real_op()(this->ctx, psi_norm, 0.0, notconv); |
| 476 | +//hipMemset(psi_norm, 0, notconv * sizeof(Real)); |
| 477 | + } catch (const std::exception& e){ |
| 478 | + std::cerr << "[Memory Debug] Failed to allocate psi_norm of size " |
| 479 | + << (notconv * sizeof(Real)) << " bytes: " << e.what() << std::endl; |
| 480 | + throw; |
| 481 | + } |
439 | 482 | normalize_op<T, Device>()(this->ctx, |
440 | 483 | this->dim, |
441 | 484 | psi_iter, |
|
0 commit comments