Skip to content

Commit 87568f0

Browse files
dyzhengjieli-matrix
authored andcommitted
Fix: do dngvd on DCU rather than CPU
1 parent b029c4e commit 87568f0

File tree

1 file changed

+185
-37
lines changed

1 file changed

+185
-37
lines changed

source/module_hsolver/kernels/rocm/dngvd_op.hip.cu

Lines changed: 185 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,23 @@
55

66
namespace hsolver {
77

8+
// NOTE: mimicked from ../cuda/dngvd_op.cu for three dngvd_op
9+
10+
static hipsolverHandle_t hipsolver_H = nullptr;
11+
812
void createGpuSolverHandle() {
9-
return;
13+
if (hipsolver_H == nullptr)
14+
{
15+
hipsolverErrcheck(hipsolverCreate(&hipsolver_H));
16+
}
1017
}
1118

1219
void destroyGpuSolverHandle() {
13-
return;
20+
if (hipsolver_H != nullptr)
21+
{
22+
hipsolverErrcheck(hipsolverDestroy(hipsolver_H));
23+
hipsolver_H = nullptr;
24+
}
1425
}
1526

1627
#ifdef __LCAO
@@ -23,22 +34,65 @@ void dngvd_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DE
2334
double* _eigenvalue,
2435
double* _vcc)
2536
{
26-
std::vector<double> hcc(nstart * nstart, 0.0);
27-
std::vector<double> scc(nstart * nstart, 0.0);
28-
std::vector<double> vcc(nstart * nstart, 0.0);
29-
std::vector<double> eigenvalue(nstart, 0);
30-
hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(double) * hcc.size(), hipMemcpyDeviceToHost));
31-
hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(double) * scc.size(), hipMemcpyDeviceToHost));
32-
base_device::DEVICE_CPU* cpu_ctx = {};
33-
dngvd_op<double, base_device::DEVICE_CPU>()(cpu_ctx,
34-
nstart,
35-
ldh,
36-
hcc.data(),
37-
scc.data(),
38-
eigenvalue.data(),
39-
vcc.data());
40-
hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(double) * vcc.size(), hipMemcpyHostToDevice));
41-
hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice));
37+
// copied from ../cuda/dngvd_op.cu, "dngvd_op"
38+
assert(nstart == ldh);
39+
40+
hipErrcheck(hipMemcpy(_vcc, _hcc, sizeof(double) * ldh * nstart, hipMemcpyDeviceToDevice));
41+
// now vcc contains hcc
42+
43+
// prepare some values for hipsolverDnZhegvd_bufferSize
44+
int * devInfo = nullptr;
45+
int lwork = 0, info_gpu = 0;
46+
double * work = nullptr;
47+
hipErrcheck(hipMalloc((void**)&devInfo, sizeof(int)));
48+
hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER;
49+
50+
// calculate the sizes needed for pre-allocated buffer.
51+
hipsolverErrcheck(hipsolverDnDsygvd_bufferSize(
52+
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
53+
nstart,
54+
_vcc, ldh,
55+
_scc, ldh,
56+
_eigenvalue,
57+
&lwork));
58+
59+
// allocate memery
60+
hipErrcheck(hipMalloc((void**)&work, sizeof(double) * lwork));
61+
62+
// compute eigenvalues and eigenvectors.
63+
hipsolverErrcheck(hipsolverDnDsygvd(
64+
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
65+
nstart,
66+
_vcc, ldh,
67+
const_cast<double *>(_scc), ldh,
68+
_eigenvalue,
69+
work, lwork, devInfo));
70+
71+
hipErrcheck(hipMemcpy(&info_gpu, devInfo, sizeof(int), hipMemcpyDeviceToHost));
72+
73+
// free the buffer
74+
hipErrcheck(hipFree(work));
75+
hipErrcheck(hipFree(devInfo));
76+
// if(fail_info != nullptr) *fail_info = info_gpu;
77+
78+
79+
//std::vector<double> hcc(nstart * nstart, 0.0);
80+
//std::vector<double> scc(nstart * nstart, 0.0);
81+
//std::vector<double> vcc(nstart * nstart, 0.0);
82+
//std::vector<double> eigenvalue(nstart, 0);
83+
//hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(double) * hcc.size(), hipMemcpyDeviceToHost));
84+
//hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(double) * scc.size(), hipMemcpyDeviceToHost));
85+
//base_device::DEVICE_CPU* cpu_ctx = {};
86+
//dngvd_op<double, base_device::DEVICE_CPU>()(cpu_ctx,
87+
// nstart,
88+
// ldh,
89+
// hcc.data(),
90+
// scc.data(),
91+
// eigenvalue.data(),
92+
// vcc.data(),
93+
// fail_info);
94+
//hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(double) * vcc.size(), hipMemcpyHostToDevice));
95+
//hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice));
4296
}
4397
#endif // __LCAO
4498

@@ -51,22 +105,64 @@ void dngvd_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const ba
51105
float* _eigenvalue,
52106
std::complex<float>* _vcc)
53107
{
54-
std::vector<std::complex<float>> hcc(nstart * nstart, {0, 0});
55-
std::vector<std::complex<float>> scc(nstart * nstart, {0, 0});
56-
std::vector<std::complex<float>> vcc(nstart * nstart, {0, 0});
57-
std::vector<float> eigenvalue(nstart, 0);
58-
hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex<float>) * hcc.size(), hipMemcpyDeviceToHost));
59-
hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(std::complex<float>) * scc.size(), hipMemcpyDeviceToHost));
60-
base_device::DEVICE_CPU* cpu_ctx = {};
61-
dngvd_op<std::complex<float>, base_device::DEVICE_CPU>()(cpu_ctx,
62-
nstart,
63-
ldh,
64-
hcc.data(),
65-
scc.data(),
66-
eigenvalue.data(),
67-
vcc.data());
68-
hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(std::complex<float>) * vcc.size(), hipMemcpyHostToDevice));
69-
hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(float) * eigenvalue.size(), hipMemcpyHostToDevice));
108+
// copied from ../cuda/dngvd_op.cu, "dngvd_op"
109+
assert(nstart == ldh);
110+
111+
hipErrcheck(hipMemcpy(_vcc, _hcc, sizeof(std::complex<float>) * ldh * nstart, hipMemcpyDeviceToDevice));
112+
// now vcc contains hcc
113+
114+
// prepare some values for hipsolverDnZhegvd_bufferSize
115+
int * devInfo = nullptr;
116+
int lwork = 0, info_gpu = 0;
117+
float2 * work = nullptr;
118+
hipErrcheck(hipMalloc((void**)&devInfo, sizeof(int)));
119+
hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER;
120+
121+
// calculate the sizes needed for pre-allocated buffer.
122+
hipsolverErrcheck(hipsolverDnChegvd_bufferSize(
123+
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
124+
nstart,
125+
reinterpret_cast<const float2 *>(_vcc), ldh,
126+
reinterpret_cast<const float2 *>(_scc), ldh,
127+
_eigenvalue,
128+
&lwork));
129+
130+
// allocate memery
131+
hipErrcheck(hipMalloc((void**)&work, sizeof(float2) * lwork));
132+
133+
// compute eigenvalues and eigenvectors.
134+
hipsolverErrcheck(hipsolverDnChegvd(
135+
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
136+
nstart,
137+
reinterpret_cast<float2 *>(_vcc), ldh,
138+
const_cast<float2 *>(reinterpret_cast<const float2 *>(_scc)), ldh,
139+
_eigenvalue,
140+
work, lwork, devInfo));
141+
142+
hipErrcheck(hipMemcpy(&info_gpu, devInfo, sizeof(int), hipMemcpyDeviceToHost));
143+
// free the buffer
144+
hipErrcheck(hipFree(work));
145+
hipErrcheck(hipFree(devInfo));
146+
// if(fail_info != nullptr) *fail_info = info_gpu;
147+
148+
149+
//std::vector<std::complex<float>> hcc(nstart * nstart, {0, 0});
150+
//std::vector<std::complex<float>> scc(nstart * nstart, {0, 0});
151+
//std::vector<std::complex<float>> vcc(nstart * nstart, {0, 0});
152+
//std::vector<float> eigenvalue(nstart, 0);
153+
//hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex<float>) * hcc.size(), hipMemcpyDeviceToHost));
154+
//hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(std::complex<float>) * scc.size(), hipMemcpyDeviceToHost));
155+
//base_device::DEVICE_CPU* cpu_ctx = {};
156+
//dngvd_op<std::complex<float>, base_device::DEVICE_CPU>()(cpu_ctx,
157+
// nstart,
158+
// ldh,
159+
// hcc.data(),
160+
// scc.data(),
161+
// eigenvalue.data(),
162+
// vcc.data(),
163+
// fail_info);
164+
//hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(std::complex<float>) * vcc.size(), hipMemcpyHostToDevice));
165+
//hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(float) * eigenvalue.size(), hipMemcpyHostToDevice));
70166
}
71167

72168
template <>
@@ -76,9 +172,61 @@ void dngvd_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const b
76172
const std::complex<double>* _hcc,
77173
const std::complex<double>* _scc,
78174
double* _eigenvalue,
79-
std::complex<double>* _vcc)
175+
std::complex<double>* _vcc
176+
)
80177
{
81-
std::vector<std::complex<double>> hcc(nstart * nstart, {0, 0});
178+
// copied from ../cuda/dngvd_op.cu, "dngvd_op"
179+
assert(nstart == ldh);
180+
181+
// save a copy of scc in case the diagonalization fails
182+
std::vector<std::complex<double>> scc(nstart * nstart, {0, 0});
183+
hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(std::complex<double>) * scc.size(), hipMemcpyDeviceToHost));
184+
185+
hipErrcheck(hipMemcpy(_vcc, _hcc, sizeof(std::complex<double>) * ldh * nstart, hipMemcpyDeviceToDevice));
186+
187+
// now vcc contains hcc
188+
189+
// prepare some values for hipsolverDnZhegvd_bufferSize
190+
int * devInfo = nullptr;
191+
int lwork = 0, info_gpu = 0;
192+
double2 * work = nullptr;
193+
hipErrcheck(hipMalloc((void**)&devInfo, sizeof(int)));
194+
hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER;
195+
196+
// calculate the sizes needed for pre-allocated buffer.
197+
hipsolverErrcheck(hipsolverDnZhegvd_bufferSize(
198+
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
199+
nstart,
200+
reinterpret_cast<const double2 *>(_vcc), ldh,
201+
reinterpret_cast<const double2 *>(_scc), ldh,
202+
_eigenvalue,
203+
&lwork));
204+
205+
// allocate memery
206+
hipErrcheck(hipMalloc((void**)&work, sizeof(double2) * lwork));
207+
208+
// compute eigenvalues and eigenvectors.
209+
hipsolverErrcheck(hipsolverDnZhegvd(
210+
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
211+
nstart,
212+
reinterpret_cast<double2 *>(_vcc), ldh,
213+
const_cast<double2 *>(reinterpret_cast<const double2 *>(_scc)), ldh,
214+
_eigenvalue,
215+
work, lwork, devInfo));
216+
217+
hipErrcheck(hipMemcpy(&info_gpu, devInfo, sizeof(int), hipMemcpyDeviceToHost));
218+
// free the buffer
219+
hipErrcheck(hipFree(work));
220+
hipErrcheck(hipFree(devInfo));
221+
// if(fail_info != nullptr) *fail_info = info_gpu;
222+
223+
224+
225+
226+
227+
228+
229+
/*std::vector<std::complex<double>> hcc(nstart * nstart, {0, 0});
82230
std::vector<std::complex<double>> scc(nstart * nstart, {0, 0});
83231
std::vector<std::complex<double>> vcc(nstart * nstart, {0, 0});
84232
std::vector<double> eigenvalue(nstart, 0);
@@ -93,7 +241,7 @@ void dngvd_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const b
93241
eigenvalue.data(),
94242
vcc.data());
95243
hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(std::complex<double>) * vcc.size(), hipMemcpyHostToDevice));
96-
hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice));
244+
hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice));*/
97245
}
98246

99247
#ifdef __LCAO

0 commit comments

Comments
 (0)