55
66namespace hsolver {
77
8+ // NOTE: mimicked from ../cuda/dngvd_op.cu for three dngvd_op
9+
10+ static hipsolverHandle_t hipsolver_H = nullptr ;
11+
812void createGpuSolverHandle () {
9- return ;
13+ if (hipsolver_H == nullptr )
14+ {
15+ hipsolverErrcheck (hipsolverCreate (&hipsolver_H));
16+ }
1017}
1118
1219void destroyGpuSolverHandle () {
13- return ;
20+ if (hipsolver_H != nullptr )
21+ {
22+ hipsolverErrcheck (hipsolverDestroy (hipsolver_H));
23+ hipsolver_H = nullptr ;
24+ }
1425}
1526
1627#ifdef __LCAO
@@ -23,22 +34,65 @@ void dngvd_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DE
2334 double * _eigenvalue,
2435 double * _vcc)
2536{
26- std::vector<double > hcc (nstart * nstart, 0.0 );
27- std::vector<double > scc (nstart * nstart, 0.0 );
28- std::vector<double > vcc (nstart * nstart, 0.0 );
29- std::vector<double > eigenvalue (nstart, 0 );
30- hipErrcheck (hipMemcpy (hcc.data (), _hcc, sizeof (double ) * hcc.size (), hipMemcpyDeviceToHost));
31- hipErrcheck (hipMemcpy (scc.data (), _scc, sizeof (double ) * scc.size (), hipMemcpyDeviceToHost));
32- base_device::DEVICE_CPU* cpu_ctx = {};
33- dngvd_op<double , base_device::DEVICE_CPU>()(cpu_ctx,
34- nstart,
35- ldh,
36- hcc.data (),
37- scc.data (),
38- eigenvalue.data (),
39- vcc.data ());
40- hipErrcheck (hipMemcpy (_vcc, vcc.data (), sizeof (double ) * vcc.size (), hipMemcpyHostToDevice));
41- hipErrcheck (hipMemcpy (_eigenvalue, eigenvalue.data (), sizeof (double ) * eigenvalue.size (), hipMemcpyHostToDevice));
37+ // copied from ../cuda/dngvd_op.cu, "dngvd_op"
38+ assert (nstart == ldh);
39+
40+ hipErrcheck (hipMemcpy (_vcc, _hcc, sizeof (double ) * ldh * nstart, hipMemcpyDeviceToDevice));
41+ // now vcc contains hcc
42+
43+ // prepare some values for hipsolverDnZhegvd_bufferSize
44+ int * devInfo = nullptr ;
45+ int lwork = 0 , info_gpu = 0 ;
46+ double * work = nullptr ;
47+ hipErrcheck (hipMalloc ((void **)&devInfo, sizeof (int )));
48+ hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER;
49+
50+ // calculate the sizes needed for pre-allocated buffer.
51+ hipsolverErrcheck (hipsolverDnDsygvd_bufferSize (
52+ hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
53+ nstart,
54+ _vcc, ldh,
55+ _scc, ldh,
56+ _eigenvalue,
57+ &lwork));
58+
59+ // allocate memery
60+ hipErrcheck (hipMalloc ((void **)&work, sizeof (double ) * lwork));
61+
62+ // compute eigenvalues and eigenvectors.
63+ hipsolverErrcheck (hipsolverDnDsygvd (
64+ hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
65+ nstart,
66+ _vcc, ldh,
67+ const_cast <double *>(_scc), ldh,
68+ _eigenvalue,
69+ work, lwork, devInfo));
70+
71+ hipErrcheck (hipMemcpy (&info_gpu, devInfo, sizeof (int ), hipMemcpyDeviceToHost));
72+
73+ // free the buffer
74+ hipErrcheck (hipFree (work));
75+ hipErrcheck (hipFree (devInfo));
76+ // if(fail_info != nullptr) *fail_info = info_gpu;
77+
78+
79+ // std::vector<double> hcc(nstart * nstart, 0.0);
80+ // std::vector<double> scc(nstart * nstart, 0.0);
81+ // std::vector<double> vcc(nstart * nstart, 0.0);
82+ // std::vector<double> eigenvalue(nstart, 0);
83+ // hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(double) * hcc.size(), hipMemcpyDeviceToHost));
84+ // hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(double) * scc.size(), hipMemcpyDeviceToHost));
85+ // base_device::DEVICE_CPU* cpu_ctx = {};
86+ // dngvd_op<double, base_device::DEVICE_CPU>()(cpu_ctx,
87+ // nstart,
88+ // ldh,
89+ // hcc.data(),
90+ // scc.data(),
91+ // eigenvalue.data(),
92+ // vcc.data(),
93+ // fail_info);
94+ // hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(double) * vcc.size(), hipMemcpyHostToDevice));
95+ // hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice));
4296}
4397#endif // __LCAO
4498
@@ -51,22 +105,64 @@ void dngvd_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const ba
51105 float * _eigenvalue,
52106 std::complex <float >* _vcc)
53107{
54- std::vector<std::complex <float >> hcc (nstart * nstart, {0 , 0 });
55- std::vector<std::complex <float >> scc (nstart * nstart, {0 , 0 });
56- std::vector<std::complex <float >> vcc (nstart * nstart, {0 , 0 });
57- std::vector<float > eigenvalue (nstart, 0 );
58- hipErrcheck (hipMemcpy (hcc.data (), _hcc, sizeof (std::complex <float >) * hcc.size (), hipMemcpyDeviceToHost));
59- hipErrcheck (hipMemcpy (scc.data (), _scc, sizeof (std::complex <float >) * scc.size (), hipMemcpyDeviceToHost));
60- base_device::DEVICE_CPU* cpu_ctx = {};
61- dngvd_op<std::complex <float >, base_device::DEVICE_CPU>()(cpu_ctx,
62- nstart,
63- ldh,
64- hcc.data (),
65- scc.data (),
66- eigenvalue.data (),
67- vcc.data ());
68- hipErrcheck (hipMemcpy (_vcc, vcc.data (), sizeof (std::complex <float >) * vcc.size (), hipMemcpyHostToDevice));
69- hipErrcheck (hipMemcpy (_eigenvalue, eigenvalue.data (), sizeof (float ) * eigenvalue.size (), hipMemcpyHostToDevice));
108+ // copied from ../cuda/dngvd_op.cu, "dngvd_op"
109+ assert (nstart == ldh);
110+
111+ hipErrcheck (hipMemcpy (_vcc, _hcc, sizeof (std::complex <float >) * ldh * nstart, hipMemcpyDeviceToDevice));
112+ // now vcc contains hcc
113+
114+ // prepare some values for hipsolverDnZhegvd_bufferSize
115+ int * devInfo = nullptr ;
116+ int lwork = 0 , info_gpu = 0 ;
117+ float2 * work = nullptr ;
118+ hipErrcheck (hipMalloc ((void **)&devInfo, sizeof (int )));
119+ hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER;
120+
121+ // calculate the sizes needed for pre-allocated buffer.
122+ hipsolverErrcheck (hipsolverDnChegvd_bufferSize (
123+ hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
124+ nstart,
125+ reinterpret_cast <const float2 *>(_vcc), ldh,
126+ reinterpret_cast <const float2 *>(_scc), ldh,
127+ _eigenvalue,
128+ &lwork));
129+
130+ // allocate memery
131+ hipErrcheck (hipMalloc ((void **)&work, sizeof (float2 ) * lwork));
132+
133+ // compute eigenvalues and eigenvectors.
134+ hipsolverErrcheck (hipsolverDnChegvd (
135+ hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
136+ nstart,
137+ reinterpret_cast <float2 *>(_vcc), ldh,
138+ const_cast <float2 *>(reinterpret_cast <const float2 *>(_scc)), ldh,
139+ _eigenvalue,
140+ work, lwork, devInfo));
141+
142+ hipErrcheck (hipMemcpy (&info_gpu, devInfo, sizeof (int ), hipMemcpyDeviceToHost));
143+ // free the buffer
144+ hipErrcheck (hipFree (work));
145+ hipErrcheck (hipFree (devInfo));
146+ // if(fail_info != nullptr) *fail_info = info_gpu;
147+
148+
149+ // std::vector<std::complex<float>> hcc(nstart * nstart, {0, 0});
150+ // std::vector<std::complex<float>> scc(nstart * nstart, {0, 0});
151+ // std::vector<std::complex<float>> vcc(nstart * nstart, {0, 0});
152+ // std::vector<float> eigenvalue(nstart, 0);
153+ // hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex<float>) * hcc.size(), hipMemcpyDeviceToHost));
154+ // hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(std::complex<float>) * scc.size(), hipMemcpyDeviceToHost));
155+ // base_device::DEVICE_CPU* cpu_ctx = {};
156+ // dngvd_op<std::complex<float>, base_device::DEVICE_CPU>()(cpu_ctx,
157+ // nstart,
158+ // ldh,
159+ // hcc.data(),
160+ // scc.data(),
161+ // eigenvalue.data(),
162+ // vcc.data(),
163+ // fail_info);
164+ // hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(std::complex<float>) * vcc.size(), hipMemcpyHostToDevice));
165+ // hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(float) * eigenvalue.size(), hipMemcpyHostToDevice));
70166}
71167
72168template <>
@@ -76,9 +172,61 @@ void dngvd_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const b
76172 const std::complex <double >* _hcc,
77173 const std::complex <double >* _scc,
78174 double * _eigenvalue,
79- std::complex <double >* _vcc)
175+ std::complex <double >* _vcc
176+ )
80177{
81- std::vector<std::complex <double >> hcc (nstart * nstart, {0 , 0 });
178+ // copied from ../cuda/dngvd_op.cu, "dngvd_op"
179+ assert (nstart == ldh);
180+
181+ // save a copy of scc in case the diagonalization fails
182+ std::vector<std::complex <double >> scc (nstart * nstart, {0 , 0 });
183+ hipErrcheck (hipMemcpy (scc.data (), _scc, sizeof (std::complex <double >) * scc.size (), hipMemcpyDeviceToHost));
184+
185+ hipErrcheck (hipMemcpy (_vcc, _hcc, sizeof (std::complex <double >) * ldh * nstart, hipMemcpyDeviceToDevice));
186+
187+ // now vcc contains hcc
188+
189+ // prepare some values for hipsolverDnZhegvd_bufferSize
190+ int * devInfo = nullptr ;
191+ int lwork = 0 , info_gpu = 0 ;
192+ double2 * work = nullptr ;
193+ hipErrcheck (hipMalloc ((void **)&devInfo, sizeof (int )));
194+ hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER;
195+
196+ // calculate the sizes needed for pre-allocated buffer.
197+ hipsolverErrcheck (hipsolverDnZhegvd_bufferSize (
198+ hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
199+ nstart,
200+ reinterpret_cast <const double2 *>(_vcc), ldh,
201+ reinterpret_cast <const double2 *>(_scc), ldh,
202+ _eigenvalue,
203+ &lwork));
204+
205+ // allocate memery
206+ hipErrcheck (hipMalloc ((void **)&work, sizeof (double2 ) * lwork));
207+
208+ // compute eigenvalues and eigenvectors.
209+ hipsolverErrcheck (hipsolverDnZhegvd (
210+ hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
211+ nstart,
212+ reinterpret_cast <double2 *>(_vcc), ldh,
213+ const_cast <double2 *>(reinterpret_cast <const double2 *>(_scc)), ldh,
214+ _eigenvalue,
215+ work, lwork, devInfo));
216+
217+ hipErrcheck (hipMemcpy (&info_gpu, devInfo, sizeof (int ), hipMemcpyDeviceToHost));
218+ // free the buffer
219+ hipErrcheck (hipFree (work));
220+ hipErrcheck (hipFree (devInfo));
221+ // if(fail_info != nullptr) *fail_info = info_gpu;
222+
223+
224+
225+
226+
227+
228+
229+ /* std::vector<std::complex<double>> hcc(nstart * nstart, {0, 0});
82230 std::vector<std::complex<double>> scc(nstart * nstart, {0, 0});
83231 std::vector<std::complex<double>> vcc(nstart * nstart, {0, 0});
84232 std::vector<double> eigenvalue(nstart, 0);
@@ -93,7 +241,7 @@ void dngvd_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const b
93241 eigenvalue.data(),
94242 vcc.data());
95243 hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(std::complex<double>) * vcc.size(), hipMemcpyHostToDevice));
96- hipErrcheck (hipMemcpy (_eigenvalue, eigenvalue.data (), sizeof (double ) * eigenvalue.size (), hipMemcpyHostToDevice));
244+ hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice));*/
97245}
98246
99247#ifdef __LCAO
0 commit comments