Update heevd interface to add lda

Cstandardlib · Cstandardlib · commit 831625e3942e · 2025-11-03T22:24:36.000+08:00
diff --git a/source/source_base/module_container/ATen/kernels/blas.h b/source/source_base/module_container/ATen/kernels/blas.h
@@ -11,6 +11,7 @@ namespace kernels {
 
 template <typename T, typename Device>
 struct blas_copy {
+    // DCOPY copies a vector, x, to a vector, y.
     void operator()(
         const int n,
         const T *x,
diff --git a/source/source_base/module_container/ATen/kernels/cuda/lapack.cu b/source/source_base/module_container/ATen/kernels/cuda/lapack.cu
@@ -231,13 +231,14 @@ template <typename T>
 struct lapack_heevd<T, DEVICE_GPU> {
     using Real = typename GetTypeReal<T>::type;
     void operator()(
-        const char& jobz,
-        const char& uplo,
+        const int dim,
         T* Mat,
-        const int& dim,
+        const int lda,
         Real* eigen_val)
     {
-        cuSolverConnector::heevd(cusolver_handle, jobz, uplo, dim, Mat, dim, eigen_val);
+        char jobz = 'V';        // Compute eigenvalues and eigenvectors
+        char uplo = 'U';
+        cuSolverConnector::heevd(cusolver_handle, jobz, uplo, dim, Mat, lda, eigen_val);
     }
 };
 
diff --git a/source/source_base/module_container/ATen/kernels/lapack.cpp b/source/source_base/module_container/ATen/kernels/lapack.cpp
@@ -196,12 +196,13 @@ template <typename T>
 struct lapack_heevd<T, DEVICE_CPU> {
     using Real = typename GetTypeReal<T>::type;
     void operator()(
-        const char& jobz,
-        const char& uplo,
+        const int dim,
         T* Mat,
-        const int& dim,
+        const int lda,
         Real* eigen_val)
     {
+        char jobz = 'V';        // Compute eigenvalues and eigenvectors
+        char uplo = 'U';
         int info = 0;
         int lwork = std::max(2 * dim + dim * dim, 1 + 6 * dim + 2 * dim * dim);
         Tensor work(DataTypeToEnum<T>::value, DeviceType::CpuDevice, {lwork});
@@ -215,7 +216,7 @@ struct lapack_heevd<T, DEVICE_CPU> {
         Tensor iwork(DataTypeToEnum<int>::value, DeviceType::CpuDevice, {liwork});
         iwork.zero();
 
-        lapackConnector::heevd(jobz, uplo, dim, Mat, dim, eigen_val, work.data<T>(), lwork, rwork.data<Real>(), lrwork, iwork.data<int>(), liwork, info);
+        lapackConnector::heevd(jobz, uplo, dim, Mat, lda, eigen_val, work.data<T>(), lwork, rwork.data<Real>(), lrwork, iwork.data<int>(), liwork, info);
         if (info != 0) {
             throw std::runtime_error("heevd failed with info = " + std::to_string(info));
         }
@@ -233,6 +234,8 @@ struct lapack_heevx<T, DEVICE_CPU> {
         Real *eigen_val,
         T *eigen_vec)
     {
+        // copy Mat to aux, solve heevx(aux, eigen_val, eigen_vec)
+        // input Mat is not referenced in actual heevx LAPACK routines, and aux is destroyed.
         Tensor aux(DataTypeToEnum<T>::value, DeviceType::CpuDevice, {n * lda});
         // Copy Mat to aux since heevx will destroy it
         // aux = Mat
diff --git a/source/source_base/module_container/ATen/kernels/lapack.h b/source/source_base/module_container/ATen/kernels/lapack.h
@@ -145,12 +145,37 @@ struct lapack_getrs {
 // ============================================================================
 template <typename T, typename Device>
 struct lapack_heevd {
+    // !> ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a
+    // !> complex Hermitian matrix A.  If eigenvectors are desired, it uses a
+    // !> divide and conquer algorithm.
+    // !>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+    // !>          orthonormal eigenvectors of the matrix A.
+    /**
+     * @brief Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix.
+     *
+     * This function solves the standard Hermitian eigenvalue problem A*x = lambda*x,
+     * where A is a Hermitian matrix. It computes all eigenvalues and optionally
+     * the corresponding eigenvectors using a divide and conquer algorithm.
+     *
+     * @param[in] dim   The order of the matrix A. dim >= 0.
+     * @param[in,out] Mat   On entry, the Hermitian matrix A.
+     *              On exit, if eigenvectors are computed, A contains the
+     *              orthonormal eigenvectors of the matrix A.
+     * @param[in] lda   The leading dimension of the array Mat. lda >= max(1, dim).
+     * @param[out] eigen_val Array of size at least dim. On normal exit, contains the
+     *                  eigenvalues in ascending order.
+     *
+     * @note
+     * See LAPACK ZHEEVD or CHEEVD documentation for more details.
+     * The matrix is assumed to be stored in upper or lower triangular form
+     * according to the uplo parameter (not shown here but typically passed
+     * to the actual implementation).
+     */
     using Real = typename GetTypeReal<T>::type;
     void operator()(
-        const char& jobz,
-        const char& uplo,
+        const int dim,
         T* Mat,
-        const int& dim,
+        const int lda,
         Real* eigen_val);
 };
 
@@ -165,7 +190,8 @@ struct lapack_heevx {
      *
      * @param dim   The order of the matrix A. dim >= 0.
      * @param lda   The leading dimension of the array Mat. lda >= max(1, dim).
-     * @param Mat   On entry, the Hermitian matrix A. On exit, A is kept.
+     * @param[in] Mat   On entry, the Hermitian matrix A. On exit, A is kept.
+     *                  Only used to provide values of matrix.
      * @param neig  The number of eigenvalues to be found. 0 <= neig <= dim.
      * @param eigen_val On normal exit, the first \p neig elements contain the selected
      *                  eigenvalues in ascending order.
diff --git a/source/source_hsolver/diago_bpcg.cpp b/source/source_hsolver/diago_bpcg.cpp
@@ -112,14 +112,14 @@ void DiagoBPCG<T, Device>::line_minimize(
 // Finally, the last two!
 template<typename T, typename Device>
 void DiagoBPCG<T, Device>::orth_cholesky(
-		ct::Tensor& workspace_in, 
-		ct::Tensor& psi_out, 
-		ct::Tensor& hpsi_out, 
+		ct::Tensor& workspace_in,
+		ct::Tensor& psi_out,
+		ct::Tensor& hpsi_out,
 		ct::Tensor& hsub_out)
 {
     // gemm: hsub_out(n_band x n_band) = psi_out^T(n_band x n_basis) * psi_out(n_basis x n_band)
     this->pmmcn.multiply(1.0, psi_out.data<T>(), psi_out.data<T>(), 0.0, hsub_out.data<T>());
-    
+
     // set hsub matrix to lower format;
     ct::kernels::set_matrix<T, ct_Device>()(
         'L', hsub_out.data<T>(), this->n_band);
@@ -209,7 +209,8 @@ void DiagoBPCG<T, Device>::diag_hsub(
     // gemm: hsub_out(n_band x n_band) = hpsi_in^T(n_band x n_basis) * psi_in(n_basis x n_band)
     this->pmmcn.multiply(1.0, hpsi_in.data<T>(), psi_in.data<T>(), 0.0, hsub_out.data<T>());
 
-    ct::kernels::lapack_heevd<T, ct_Device>()('V', 'U', hsub_out.data<T>(), this->n_band, eigenvalue_out.data<Real>());
+    // ct::kernels::lapack_heevd<T, ct_Device>()('V', 'U', hsub_out.data<T>(), this->n_band, eigenvalue_out.data<Real>());
+    ct::kernels::lapack_heevd<T, ct_Device>()(this->n_band, hsub_out.data<T>(), this->n_band, eigenvalue_out.data<Real>());
 
     return;
 }
@@ -235,15 +236,15 @@ void DiagoBPCG<T, Device>::calc_hsub_with_block(
     // hpsi_out[n_basis, n_band] = psi_out[n_basis, n_band] x hsub_out[n_band, n_band]
     this->rotate_wf(hsub_out, psi_out, workspace_in);
     this->rotate_wf(hsub_out, hpsi_out, workspace_in);
- 
+
     return;
 }
 
 template<typename T, typename Device>
 void DiagoBPCG<T, Device>::calc_hsub_with_block_exit(
-        ct::Tensor& psi_out, 
+        ct::Tensor& psi_out,
         ct::Tensor& hpsi_out,
-        ct::Tensor& hsub_out, 
+        ct::Tensor& hsub_out,
         ct::Tensor& workspace_in,
         ct::Tensor& eigenvalue_out)
 {