Add getrf for Tensor LAPACK

Cstandardlib · Cstandardlib · commit 88a91a7872ad · 2025-02-14T16:52:38.000+08:00
diff --git a/source/module_base/module_container/ATen/kernels/lapack.cpp b/source/module_base/module_container/ATen/kernels/lapack.cpp
@@ -179,6 +179,26 @@ struct lapack_getrs<T, DEVICE_CPU> {
     }
 };
 
+
+template <typename T>
+struct lapack_geqrf<T, DEVICE_CPU> {
+    void operator()(
+        const int& m,
+        const int& n,
+        T* A,
+        const int& lda,
+        T* tau,
+        T* work,
+        const int& lwork)
+    {
+        int info = 0;
+        lapackConnector::geqrf(m, n, A, lda, tau, work, lwork, info);
+        if (info != 0) {
+            throw std::runtime_error("geqrf failed with info = " + std::to_string(info));
+        }
+    }
+};
+
 template struct set_matrix<float,  DEVICE_CPU>;
 template struct set_matrix<double, DEVICE_CPU>;
 template struct set_matrix<std::complex<float>,  DEVICE_CPU>;
@@ -219,5 +239,10 @@ template struct lapack_getrs<double, DEVICE_CPU>;
 template struct lapack_getrs<std::complex<float>, DEVICE_CPU>;
 template struct lapack_getrs<std::complex<double>, DEVICE_CPU>;
 
+template struct lapack_geqrf<float, DEVICE_CPU>;
+template struct lapack_geqrf<double, DEVICE_CPU>;
+template struct lapack_geqrf<std::complex<float>, DEVICE_CPU>;
+template struct lapack_geqrf<std::complex<double>, DEVICE_CPU>;
+
 } // namespace kernels
 } // namespace container
diff --git a/source/module_base/module_container/ATen/kernels/lapack.h b/source/module_base/module_container/ATen/kernels/lapack.h
@@ -249,6 +249,35 @@ struct lapack_getrs {
         const int& ldb);
 };
 
+
+// add geqrf wrapper
+template <typename T, typename Device>
+struct lapack_geqrf {
+    /**
+     * @brief Perform QR factorization on a matrix.
+     * 
+     * The factorization has the form
+     *    A = Q * R,
+     * where Q is orthogonal and R is upper triangular.
+     * 
+     * @param m The number of rows of the matrix.
+     * @param n The number of columns of the matrix.
+     * @param A Pointer to the matrix data.
+     * @param lda Leading dimension of the matrix.
+     * @param tau Pointer to the array of scalar factors of the elementary reflectors.
+     * @param work Pointer to the workspace array.
+     * @param lwork The size of the workspace array.
+     */
+    void operator()(
+        const int& m,
+        const int& n,
+        T* A,
+        const int& lda,
+        T* tau,
+        T* work,
+        const int& lwork);
+};
+
 #if defined(__CUDA) || defined(__ROCM)
 // TODO: Use C++ singleton to manage the GPU handles
 void createGpuSolverHandle();  // create cusolver handle
diff --git a/source/module_base/module_container/base/third_party/lapack.h b/source/module_base/module_container/base/third_party/lapack.h
@@ -119,6 +119,11 @@ void sgetrs_(const char* trans, const int* n, const int* nrhs, const float* A, c
 void dgetrs_(const char* trans, const int* n, const int* nrhs, const double* A, const int* lda, const int* ipiv, double* B, const int* ldb, int* info);
 void cgetrs_(const char* trans, const int* n, const int* nrhs, const std::complex<float>* A, const int* lda, const int* ipiv, std::complex<float>* B, const int* ldb, int* info);
 void zgetrs_(const char* trans, const int* n, const int* nrhs, const std::complex<double>* A, const int* lda, const int* ipiv, std::complex<double>* B, const int* ldb, int* info);
+
+void sgeqrf_(const int* m, const int* n, float* a, const int* lda, float* tau, float* work, const int* lwork, int* info);
+void dgeqrf_(const int* m, const int* n, double* a, const int* lda, double* tau, double* work, const int* lwork, int* info);
+void cgeqrf_(const int* m, const int* n, std::complex<float>* a, const int* lda, std::complex<float>* tau, std::complex<float>* work, const int* lwork, int* info);
+void zgeqrf_(const int* m, const int* n, std::complex<double>* a, const int* lda, std::complex<double>* tau, std::complex<double>* work, const int* lwork, int* info);
 }
 
 // Class LapackConnector provide the connector to fortran lapack routine.
@@ -398,6 +403,29 @@ void getrs(const char& trans, const int n, const int nrhs, std::complex<double>*
     zgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info);
 }
 
+static inline
+void geqrf(const int m, const int n, float* A, const int lda, float* tau, float* work, const int lwork, int& info)
+{
+    sgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info);
+}
+static inline
+void geqrf(const int m, const int n, double* A, const int lda, double* tau, double* work, const int lwork, int& info)
+{
+    dgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info);
+}
+static inline
+void geqrf(const int m, const int n, std::complex<float>* A, const int lda, std::complex<float>* tau, std::complex<float>* work, const int lwork, int& info)
+{
+    cgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info);
+}
+static inline
+void geqrf(const int m, const int n, std::complex<double>* A, const int lda, std::complex<double>* tau, std::complex<double>* work, const int lwork, int& info)
+{
+    zgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info);
+}
+
+
+
 } // namespace lapackConnector
 } // namespace container