Add lapack_heevx

Cstandardlib · Cstandardlib · commit 82b8acf93964 · 2025-10-18T00:20:05.000+08:00
diff --git a/source/source_base/module_container/ATen/kernels/cuda/lapack.cu b/source/source_base/module_container/ATen/kernels/cuda/lapack.cu
@@ -101,6 +101,39 @@ struct lapack_heevd<T, DEVICE_GPU> {
     }
 };
 
+template <typename T>
+struct lapack_heevx<T, DEVICE_GPU> {
+    using Real = typename GetTypeReal<T>::type;
+    void operator()(
+        const int n,
+        const int lda,
+        T *d_Mat,
+        const int neig,
+        Real *d_eigen_val,
+        T *d_eigen_vec)
+    {
+        // copy d_Mat to d_eigen_vec, and results will be overwritten into d_eigen_vec
+        // by cuSolver
+        cudaErrcheck(cudaMemcpy(d_eigen_vec, d_Mat, sizeof(T) * n * lda, cudaMemcpyDeviceToDevice));
+
+        int meig = 0;
+
+        cuSolverConnector::heevdx(
+            cusolver_handle,
+            n,
+            lda,
+            d_eigen_vec,
+            'V',        // jobz: compute vectors
+            'L',        // uplo: lower triangle
+            'I',        // range: by index
+            1, neig,    // il, iu
+            Real(0), Real(0), // vl, vu (unused)
+            d_eigen_val,
+            &meig
+        );
+
+    }
+};
 template <typename T>
 struct lapack_hegvd<T, DEVICE_GPU> {
     using Real = typename GetTypeReal<T>::type;
diff --git a/source/source_base/module_container/ATen/kernels/lapack.cpp b/source/source_base/module_container/ATen/kernels/lapack.cpp
@@ -4,10 +4,17 @@
 
 // #include <cstring> // std::memcpy
 #include <algorithm> // std::copy
+#include <stdexcept>
+#include <string>
 
 namespace container {
 namespace kernels {
 
+inline double get_real(const std::complex<double> &x) { return x.real(); }
+inline float get_real(const std::complex<float> &x) { return x.real(); }
+inline double get_real(const double &x) { return x; }
+inline float get_real(const float &x) { return x; }
+
 template <typename T>
 struct set_matrix<T, DEVICE_CPU> {
     void operator() (
@@ -95,6 +102,96 @@ struct lapack_heevd<T, DEVICE_CPU> {
     }
 };
 
+template <typename T>
+struct lapack_heevx<T, DEVICE_CPU> {
+    using Real = typename GetTypeReal<T>::type;
+    void operator()(
+        const int n,
+        const int lda,
+        T *Mat,
+        const int neig,
+        Real *eigen_val,
+        T *eigen_vec)
+    {
+        Tensor aux(DataTypeToEnum<T>::value, DeviceType::CpuDevice, {n * lda});
+        // Copy Mat to aux since heevx will destroy it
+        // aux = Mat
+        std::copy(Mat, Mat + n * lda, aux);
+
+        char jobz = 'V';        // Compute eigenvalues and eigenvectors
+        char range = 'I';       // Find eigenvalues in index range [il, iu]
+        char uplo = 'L';        // Use Lower triangle
+        int info = 0;
+        int found = 0;          // Number of eigenvalues found
+        // found should be iu - il + 1, i.e. found = neig
+        const int il = 1;
+        const int iu = neig;
+        Real abstol = 0.0;
+
+        // Workspace query first
+        int lwork = -1;
+        T work_query;
+        Real rwork_query;
+        int iwork_query;
+        int ifail_query;
+
+        // Dummy call to get optimal workspace size
+        // when lwork = -1
+        lapackConnector::heevx(
+            jobz, range, uplo, n,
+            aux, lda,
+            0.0, 0.0, il, iu,   // vl, vu not used when range='I'
+            abstol,
+            &found,
+            eigen_val,
+            eigen_vec, lda,
+            &work_query, lwork,
+            &rwork_query,
+            &iwork_query,
+            &ifail_query,
+            &info);
+
+        if (info != 0) {
+            throw std::runtime_error("heevx workspace query failed with info = " + std::to_string(info));
+        }
+
+        lwork = static_cast<int>(get_real(work_query));
+
+        // Allocate buffers using Tensor (RAII)
+        Tensor work(DataTypeToEnum<T>::value, DeviceType::CpuDevice, {lwork});
+        work.zero();
+
+        Tensor rwork(DataTypeToEnum<Real>::value, DeviceType::CpuDevice, {7 * n});
+        rwork.zero();
+
+        Tensor iwork(DataType::DT_INT, DeviceType::CpuDevice, {5 * n});
+        iwork.zero();
+
+        Tensor ifail(DataType::DT_INT, DeviceType::CpuDevice, {n});
+        ifail.zero();
+
+        // Actual call to heevx
+        lapackConnector::heevx(
+            jobz, range, uplo, n,
+            aux, lda,
+            0.0, 0.0, il, iu,
+            abstol,
+            &found,
+            eigen_val,
+            eigen_vec, lda,
+            work.data<T>(), lwork,
+            rwork.data<Real>(),
+            iwork.data<int>(),
+            ifail.data<int>(),
+            &info);
+
+        if (info != 0) {
+            throw std::runtime_error("heevx failed with info = " + std::to_string(info));
+        }
+
+    }
+};
+
 template <typename T>
 struct lapack_hegvd<T, DEVICE_CPU> {
     using Real = typename GetTypeReal<T>::type;
diff --git a/source/source_base/module_container/ATen/kernels/lapack.h b/source/source_base/module_container/ATen/kernels/lapack.h
@@ -1,6 +1,7 @@
 #ifndef ATEN_KERNELS_LAPACK_H_
 #define ATEN_KERNELS_LAPACK_H_
 
+#include "source_base/macros.h"
 #include <ATen/core/tensor.h>
 #include <ATen/core/tensor_types.h>
 
@@ -51,6 +52,40 @@ struct lapack_heevd {
         Real* eigen_val);
 };
 
+template <typename T, typename Device>
+struct lapack_heevx {
+    using Real = typename GetTypeReal<T>::type;
+    /**
+     * @brief Computes selected eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix.
+     *
+     * This function solves the problem A*x = lambda*x, where A is a Hermitian matrix.
+     * It computes a subset of eigenvalues and, optionally, the corresponding eigenvectors.
+     *
+     * @param jobz  'N': Compute eigenvalues only; 'V': Compute eigenvalues and eigenvectors.
+     * @param range 'A': All eigenvalues; 'V': Eigenvalues in the half-open interval (vl, vu]; 'I': Eigenvalues with indices il through iu.
+     * @param uplo  'U': Upper triangle of A is stored; 'L': Lower triangle is stored.
+     * @param dim   The order of the matrix A. dim >= 0.
+     * @param Mat   On entry, the Hermitian matrix A. On exit, it may be overwritten.
+     * @param vl    Lower bound of the interval to search for eigenvalues if range == 'V'.
+     * @param vu    Upper bound of the interval to search for eigenvalues if range == 'V'.
+     * @param il    Index of the smallest eigenvalue to be returned if range == 'I'.
+     * @param iu    Index of the largest eigenvalue to be returned if range == 'I'.
+     * @param m     Output: The total number of found eigenvalues.
+     * @param eigen_val Array to store the computed eigenvalues in ascending order.
+     * @param eigen_vec If not nullptr and jobz == 'V', array to store the computed eigenvectors.
+     *
+     * @note
+     * See LAPACK ZHEEVX or CHEEVX documentation for more details.
+     *
+     */
+    void operator()(
+        const int dim,
+        const int lda,
+        T *Mat,
+        const int neig,
+        Real *eigen_val,
+        T *eigen_vec);
+};
 
 template <typename T, typename Device>
 struct lapack_hegvd {
@@ -60,8 +95,8 @@ struct lapack_hegvd {
      *
      * This function solves the problem A*x = lambda*B*x, where A and B are Hermitian matrices, and B is also positive definite.
      *
-     * @param dim The order of the matrices Mat_A and Mat_B. dim >= 0.
-     * @param lda The leading dimension of the arrays Mat_A and Mat_B. lda >= max(1, dim).
+     * @param n The order of the matrices Mat_A and Mat_B. n >= 0.
+     * @param lda The leading dimension of the arrays Mat_A and Mat_B. lda >= max(1, n).
      * @param Mat_A On entry, the Hermitian matrix A. On exit, it may be overwritten.
      * @param Mat_B On entry, the Hermitian positive definite matrix B. On exit, it may be overwritten.
      * @param eigen_val Array to store the computed eigenvalues in ascending order.
@@ -72,7 +107,7 @@ struct lapack_hegvd {
      * This function assumes that A and B have the same leading dimensions, lda.
      */
     void operator()(
-        const int dim,
+        const int n,
         const int lda,
         T *Mat_A,
         T *Mat_B,
diff --git a/source/source_base/module_container/base/macros/cuda.h b/source/source_base/module_container/base/macros/cuda.h
@@ -121,6 +121,29 @@ static inline cusolverEigType_t cublas_eig_type(const int& itype)
         throw std::runtime_error("cublas_eig_mode: unknown diag");
 }
 
+/**
+ * @brief Converts a character specifying eigenvalue range to cuSOLVER enum.
+ *
+ *        'A' or 'a' -> CUSOLVER_EIG_RANGE_ALL: all eigenvalues
+ *        'V' or 'v' -> CUSOLVER_EIG_RANGE_V:  values in [vl, vu]
+ *        'I' or 'i' -> CUSOLVER_EIG_RANGE_I:  indices in [il, iu]
+ *
+ * @param range Character indicating selection mode ('A', 'V', 'I')
+ * @return Corresponding cusolverEigRange_t enum value
+ * @throws std::runtime_error if character is invalid
+ */
+static inline cusolverEigRange_t cublas_eig_range(const char& range)
+{
+    if (range == 'A' || range == 'a')
+        return CUSOLVER_EIG_RANGE_ALL;
+    else if (range == 'V' || range == 'v')
+        return CUSOLVER_EIG_RANGE_V;
+    else if (range == 'I' || range == 'i')
+        return CUSOLVER_EIG_RANGE_I;
+    else
+        throw std::runtime_error("cublas_eig_range: unknown range '" + std::string(1, range) + "'");
+}
+
 // cuSOLVER API errors
 static const char* cusolverGetErrorEnum(cusolverStatus_t error)
 {
@@ -226,4 +249,4 @@ inline void cublasAssert(cublasStatus_t res, const char* file, int line)
 #define cudaCheckOnDebug()
 #endif
 
-#endif // BASE_MACROS_CUDA_H_
+#endif // BASE_MACROS_CUDA_H_
diff --git a/source/source_base/module_container/base/third_party/cusolver.h b/source/source_base/module_container/base/third_party/cusolver.h