22
33#include < base/third_party/lapack.h>
44
5+ // #include <cstring> // std::memcpy
6+ #include < algorithm> // std::copy
7+
58namespace container {
69namespace kernels {
710
@@ -36,7 +39,7 @@ struct lapack_trtri<T, DEVICE_CPU> {
3639 const char & diag,
3740 const int & dim,
3841 T* Mat,
39- const int & lda)
42+ const int & lda)
4043 {
4144 int info = 0 ;
4245 lapackConnector::trtri (uplo, diag, dim, Mat, lda, info);
@@ -51,8 +54,8 @@ struct lapack_potrf<T, DEVICE_CPU> {
5154 void operator ()(
5255 const char & uplo,
5356 const int & dim,
54- T* Mat,
55- const int & lda)
57+ T* Mat,
58+ const int & lda)
5659 {
5760 int info = 0 ;
5861 lapackConnector::potrf (uplo, dim, Mat, dim, info);
@@ -85,7 +88,7 @@ struct lapack_heevd<T, DEVICE_CPU> {
8588 Tensor iwork (DataTypeToEnum<int >::value, DeviceType::CpuDevice, {liwork});
8689 iwork.zero ();
8790
88- lapackConnector::heevd (jobz, uplo, dim, Mat, dim, eigen_val, work.data <T>(), lwork, rwork.data <Real>(), lrwork, iwork.data <int >(), liwork, info);
91+ lapackConnector::heevd (jobz, uplo, dim, Mat, dim, eigen_val, work.data <T>(), lwork, rwork.data <Real>(), lrwork, iwork.data <int >(), liwork, info);
8992 if (info != 0 ) {
9093 throw std::runtime_error (" heevd failed with info = " + std::to_string (info));
9194 }
@@ -96,14 +99,26 @@ template <typename T>
9699struct lapack_hegvd <T, DEVICE_CPU> {
97100 using Real = typename GetTypeReal<T>::type;
98101 void operator ()(
99- const int & itype,
100- const char & jobz,
101- const char & uplo,
102- T* Mat_A,
103- T* Mat_B,
104- const int & dim,
105- Real* eigen_val)
102+ const int dim,
103+ const int lda,
104+ T *Mat_A,
105+ T *Mat_B,
106+ Real *eigen_val,
107+ T *eigen_vec)
106108 {
109+ // first copy Mat_A to eigen_vec
110+ // then pass as argument "A" in lapack hegvd
111+ // and this block of memory will be overwritten by eigenvectors
112+ // for (int i = 0; i < dim * lda; ++i){
113+ // eigen_vec[i] = Mat_A[i];
114+ // }
115+ // std::memcpy(eigen_vec, Mat_A, sizeof(T) * dim * lda);
116+ // eigen_vec = Mat_A
117+ std::copy (Mat_A, Mat_A + dim*lda, eigen_vec);
118+
119+ const int itype = 1 ;
120+ const char jobz = ' V' ;
121+ const char uplo = ' U' ;
107122 int info = 0 ;
108123 int lwork = std::max (2 * dim + dim * dim, 1 + 6 * dim + 2 * dim * dim);
109124 Tensor work (DataTypeToEnum<T>::value, DeviceType::CpuDevice, {lwork});
@@ -117,13 +132,90 @@ struct lapack_hegvd<T, DEVICE_CPU> {
117132 Tensor iwork (DataType::DT_INT, DeviceType::CpuDevice, {liwork});
118133 iwork.zero ();
119134
120- lapackConnector::hegvd (itype, jobz, uplo, dim, Mat_A, dim, Mat_B, dim, eigen_val, work.data <T>(), lwork, rwork.data <Real>(), lrwork, iwork.data <int >(), liwork, info);
135+ // After this, eigen_vec will contain the matrix Z of eigenvectors
136+ lapackConnector::hegvd (itype, jobz, uplo, dim, eigen_vec, lda, Mat_B, lda, eigen_val, work.data <T>(), lwork, rwork.data <Real>(), lrwork, iwork.data <int >(), liwork, info);
121137 if (info != 0 ) {
122138 throw std::runtime_error (" hegvd failed with info = " + std::to_string (info));
123139 }
124140 }
125141};
126142
143+
144+ // template <typename T>
145+ // struct lapack_hegvx<T, DEVICE_CPU> {
146+ // using Real = typename GetTypeReal<T>::type;
147+ // void operator()(
148+ // const int n,
149+ // const int lda,
150+ // T *A,
151+ // T *B,
152+ // const int m,
153+ // Real *eigen_val,
154+ // T *eigen_vec)
155+ // {
156+ // int info = 0;
157+
158+ // int mm = m;
159+
160+ // int lwork = -1;
161+
162+ // T *work = new T[1];
163+ // Real *rwork = new Real[7 * n];
164+ // int *iwork = new int[5 * n];
165+ // int *ifail = new int[n];
166+
167+ // // set lwork = -1 to query optimal work size
168+ // lapackConnector::hegvx(1, // ITYPE = 1: A*x = (lambda)*B*x
169+ // 'V', 'I', 'U',
170+ // n, A, lda, B, lda,
171+ // 0.0, 0.0,
172+ // 1, m, 0.0, mm,
173+ // eigen_val, eigen_vec, lda,
174+ // work,
175+ // lwork, // lwork = 1, query optimal size.
176+ // rwork, iwork, ifail,
177+ // info);
178+
179+ // // !> If LWORK = -1, then a workspace query is assumed; the routine
180+ // // !> only calculates the optimal size of the WORK array, returns
181+ // // !> this value as the first entry of the WORK array.
182+ // lwork = int(get_real(work[0]));
183+ // delete[] work;
184+ // work = new T[lwork];
185+
186+ // lapackConnector::hegvx(
187+ // 1, // ITYPE = 1: A*x = (lambda)*B*x
188+ // 'V', // JOBZ = 'V': Compute eigenvalues and eigenvectors.
189+ // 'I', // RANGE = 'I': the IL-th through IU-th eigenvalues will be found.
190+ // 'U', // UPLO = 'U': Upper triangles of A and B are stored.
191+ // n, // order of the matrices A and B.
192+ // A, // A is COMPLEX*16 array dimension (LDA, N)
193+ // lda, // leading dimension of the array A.
194+ // B, // B is COMPLEX*16 array, dimension (LDB, N)
195+ // lda, // assume that leading dimension of B is the same as A.
196+ // 0.0, // VL, Not referenced if RANGE = 'A' or 'I'.
197+ // 0.0, // VU, Not referenced if RANGE = 'A' or 'I'.
198+ // 1, // IL: If RANGE='I', the index of the smallest eigenvalue to be returned. 1 <= IL <= IU <= N,
199+ // m, // IU: If RANGE='I', the index of the largest eigenvalue to be returned. 1 <= IL <= IU <= N,
200+ // 0.0, // ABSTOL
201+ // mm, // M: The total number of eigenvalues found. 0 <= M <= N. if RANGE = 'I', M = IU-IL+1.
202+ // eigen_val, // W store eigenvalues
203+ // eigen_vec, // Z store eigenvector
204+ // lda, // LDZ: The leading dimension of the array Z.
205+ // work,
206+ // lwork,
207+ // rwork,
208+ // iwork,
209+ // ifail,
210+ // info);
211+
212+ // delete[] work;
213+ // delete[] rwork;
214+ // delete[] iwork;
215+ // delete[] ifail;
216+ // }
217+ // };
218+
127219template <typename T>
128220struct lapack_getrf <T, DEVICE_CPU> {
129221 void operator ()(
@@ -220,4 +312,4 @@ template struct lapack_getrs<std::complex<float>, DEVICE_CPU>;
220312template struct lapack_getrs <std::complex <double >, DEVICE_CPU>;
221313
222314} // namespace kernels
223- } // namespace container
315+ } // namespace container
0 commit comments