From 85c114ee48e30a046501f4d4b0eb755f4bb68d43 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 7 May 2024 04:21:53 -0600 Subject: [PATCH 1/6] Lapack - geqrf: adding wrapper for QR decomposition The wrappers will allow use of LAPACK, cusolver, rocsolver and magma Signed-off-by: Luc-Berger Vergiat --- lapack/CMakeLists.txt | 6 + .../KokkosLapack_geqrf_eti_spec_inst.cpp.in | 25 + .../KokkosLapack_geqrf_eti_spec_avail.hpp.in | 24 + .../KokkosLapack_geqrf_eti_spec_decl.hpp.in | 11 + lapack/impl/KokkosLapack_geqrf_impl.hpp | 34 ++ lapack/impl/KokkosLapack_geqrf_spec.hpp | 121 +++++ lapack/src/KokkosLapack_geqrf.hpp | 152 ++++++ lapack/tpls/KokkosLapack_Host_tpl.cpp | 67 ++- lapack/tpls/KokkosLapack_Host_tpl.hpp | 2 + .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 129 +++++ .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 429 +++++++++++++++++ lapack/unit_test/Test_Lapack.hpp | 1 + lapack/unit_test/Test_Lapack_geqrf.hpp | 452 ++++++++++++++++++ 13 files changed, 1436 insertions(+), 17 deletions(-) create mode 100644 lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in create mode 100644 lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in create mode 100644 lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in create mode 100644 lapack/impl/KokkosLapack_geqrf_impl.hpp create mode 100644 lapack/impl/KokkosLapack_geqrf_spec.hpp create mode 100644 lapack/src/KokkosLapack_geqrf.hpp create mode 100644 lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp create mode 100644 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp create mode 100644 lapack/unit_test/Test_Lapack_geqrf.hpp diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 22f08269aa..20d5de74e3 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -62,3 +62,9 @@ kokkoskernels_generate_eti(Lapack_svd svd HEADER_LIST ETI_HEADERS SOURCE_LIST SOURCES TYPE_LISTS FLOATS LAYOUTS DEVICES) + +KOKKOSKERNELS_GENERATE_ETI(Lapack_geqrf geqrf + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES) diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..4f4ad91cb6 --- /dev/null +++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in @@ -0,0 +1,25 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosLapack_geqrf_spec.hpp" + +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_INST_BLOCK@ + } // namespace Impl +} // namespace KokkosLapack diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..899a8b7604 --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in @@ -0,0 +1,24 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_AVAIL_BLOCK@ + } // namespace Impl +} // namespace KokkosLapack +#endif diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..ae6318f4e2 --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_DECL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/lapack/impl/KokkosLapack_geqrf_impl.hpp b/lapack/impl/KokkosLapack_geqrf_impl.hpp new file mode 100644 index 0000000000..c3a25824db --- /dev/null +++ b/lapack/impl/KokkosLapack_geqrf_impl.hpp @@ -0,0 +1,34 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_IMPL_GEQRF_HPP_ +#define KOKKOSLAPACK_IMPL_GEQRF_HPP_ + +/// \file KokkosLapack_geqrf_impl.hpp +/// \brief Implementation(s) of dense linear solve. + +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// NOTE: Might add the implementation of KokkosLapack::geqrf later + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_IMPL_GEQRF_HPP diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp new file mode 100644 index 0000000000..84d2bf8b9f --- /dev/null +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -0,0 +1,121 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct geqrf_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization availability +// KokkosLapack::Impl::GEQRF. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct geqrf_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// Unification layer +template ::value, + bool eti_spec_avail = geqrf_eti_spec_avail::value> +struct GEQRF { + static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau, const TWArray &Work, + const RType &R); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +// Unification layer +template +struct GEQRF { + static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, const TWArray & /* Tau */, + const TWArray & /* Work */, const RType & /* R */) { + // NOTE: Might add the implementation of KokkosLapack::geqrf later + throw std::runtime_error( + "No fallback implementation of GEQRF (general QR factorization) " + "exists. Enable LAPACK, CUSOLVER, ROCSOLVER or MAGMA TPL."); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization of +// KokkosLapack::Impl::GEQRF. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEQRF, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include + +#endif // KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp new file mode 100644 index 0000000000..3216203f27 --- /dev/null +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -0,0 +1,152 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file KokkosLapack_geqrf.hpp +/// \brief QR factorization +/// +/// This file provides KokkosLapack::geqrf. This function performs a +/// local (no MPI) QR factorization of a M-by-N matrix A. + +#ifndef KOKKOSLAPACK_GEQRF_HPP_ +#define KOKKOSLAPACK_GEQRF_HPP_ + +#include + +#include "KokkosLapack_geqrf_spec.hpp" +#include "KokkosKernels_Error.hpp" + +namespace KokkosLapack { + +/// \brief Computes a QR factorization of a matrix A +/// +/// \tparam ExecutionSpace The space where the kernel will run. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. +/// +/// \param space [in] Execution space instance used to specified how to execute +/// the geqrf kernels. +/// \param A [in,out] On entry, the M-by-N matrix to be factorized. +/// On exit, the elements on and above the diagonal contain +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). +/// \param Tau [out] One-dimensional array of size min(M,N) that contains the +/// scalar factors of the elementary reflectors. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successfull exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value +/// +template +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { + // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and + // rocSOLVER TPLs. + // MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU + // interface for device views LAPACK TPL should be enabled to call the + // LAPACK interface for host views + + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + + static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: Tau must be Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: Info must be Kokkos::View."); + + static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::geqrf: A must have rank 2."); + static_assert(static_cast(TauArray::rank) == 1, "KokkosLapack::geqrf: Tau must have rank 1."); + static_assert(static_cast(InfoArray::rank) == 1, "KokkosLapack::geqrf: Info must have rank 1."); + + static_assert(std::is_same_v, + "KokkosLapack::geqrf: Info must be an array of integers."); + + const int64_t m = A.extent(0); + const int64_t n = A.extent(1); + const int64_t tau0 = Tau.extent(0); + const int64_t info0 = Info.extent(0); + + // Check validity of dimensions + if (tau0 != std::min(m, n)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): " + << " A: " << m << " x " << n << ", Tau length = " << tau0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + if (info0 == 0) { + std::ostringstream os; + os << "KokkosLapack::geqrf: length of Info must be at least 1: " + << " A: " << m << " x " << n << ", Info length = " << info0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + using AMatrix_Internal = Kokkos::View>; + using TauArray_Internal = Kokkos::View>; + using InfoArray_Internal = Kokkos::View>; + + AMatrix_Internal A_i = A; + TauArray_Internal Tau_i = Tau; + InfoArray_Internal Info_i = Info; + + KokkosLapack::Impl::GEQRF::geqrf( + space, A_i, Tau_i, Info_i); +} + +/// \brief Computes a QR factorization of a matrix A +/// +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. +/// +/// \param A [in,out] On entry, the M-by-N matrix to be factorized. +/// On exit, the elements on and above the diagonal contain +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). +/// \param Tau [out] One-dimensional array of size min(M,N) that contains the +/// scalar factors of the elementary reflectors. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successfull exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value +/// +template +void geqrf(const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { + typename AMatrix::execution_space space{}; + geqrf(space, A, Tau, Info); +} + +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_GEQRF_HPP_ diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index dadd568182..554736295a 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -51,6 +51,17 @@ void F77_BLAS_MANGLE(strtri, STRTRI)(const char*, const char*, int*, const float void F77_BLAS_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, const double*, int*, int*); void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); + +/// +/// Geqrf +/// + +void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(const int*, const int*, float*, const int*, float*, float*, int*, int*); +void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(const int*, const int*, double*, const int*, double*, double*, int*, int*); +void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(const int*, const int*, std::complex*, const int*, std::complex*, + std::complex*, int*, int*); +void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(const int*, const int*, std::complex*, const int*, std::complex*, + std::complex*, int*, int*); } #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) @@ -68,6 +79,11 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, const std:: #define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) #define F77_FUNC_ZTRTRI F77_BLAS_MANGLE(ztrtri, ZTRTRI) +#define F77_FUNC_SGEQRF F77_BLAS_MANGLE(sgeqrf, SGEQRF) +#define F77_FUNC_DGEQRF F77_BLAS_MANGLE(dgeqrf, DGEQRF) +#define F77_FUNC_CGEQRF F77_BLAS_MANGLE(cgeqrf, CGEQRF) +#define F77_FUNC_ZGEQRF F77_BLAS_MANGLE(zgeqrf, ZGEQRF) + namespace KokkosLapack { namespace Impl { @@ -91,6 +107,10 @@ int HostLapack::trtri(const char uplo, const char diag, int n, const floa F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack::geqrf(int m, int n, float* a, int lda, float* tau, float* work, int lwork, int* info) { + F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} /// /// double @@ -112,55 +132,68 @@ int HostLapack::trtri(const char uplo, const char diag, int n, const dou F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, double* work, int lwork, int* info) { + F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} /// /// std::complex /// template <> -void HostLapack >::gesv(int n, int rhs, std::complex* a, int lda, int* ipiv, - std::complex* b, int ldb, int info) { +void HostLapack>::gesv(int n, int rhs, std::complex* a, int lda, int* ipiv, + std::complex* b, int ldb, int info) { F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd(const char jobu, const char jobvt, const int m, const int n, - std::complex* a, const int lda, float* s, std::complex* u, - const int ldu, std::complex* vt, const int ldvt, - std::complex* work, int lwork, float* rwork, int info) { +void HostLapack>::gesvd(const char jobu, const char jobvt, const int m, const int n, + std::complex* a, const int lda, float* s, std::complex* u, + const int ldu, std::complex* vt, const int ldvt, + std::complex* work, int lwork, float* rwork, int info) { F77_FUNC_CGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, - int lda) { +int HostLapack>::trtri(const char uplo, const char diag, int n, const std::complex* a, + int lda) { int info = 0; F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, + std::complex* work, int lwork, int* info) { + F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} /// /// std::complex /// template <> -void HostLapack >::gesv(int n, int rhs, std::complex* a, int lda, int* ipiv, - std::complex* b, int ldb, int info) { +void HostLapack>::gesv(int n, int rhs, std::complex* a, int lda, int* ipiv, + std::complex* b, int ldb, int info) { F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd(const char jobu, const char jobvt, const int m, const int n, - std::complex* a, const int lda, double* s, - std::complex* u, const int ldu, std::complex* vt, - const int ldvt, std::complex* work, int lwork, double* rwork, - int info) { +void HostLapack>::gesvd(const char jobu, const char jobvt, const int m, const int n, + std::complex* a, const int lda, double* s, std::complex* u, + const int ldu, std::complex* vt, const int ldvt, + std::complex* work, int lwork, double* rwork, int info) { F77_FUNC_ZGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, - int lda) { +int HostLapack>::trtri(const char uplo, const char diag, int n, const std::complex* a, + int lda) { int info = 0; F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, + std::complex* work, int lwork, int* info) { + F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index 7a2374ad43..27c8b8015d 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -24,6 +24,8 @@ struct HostLapack { T *work, int lwork, typename KokkosKernels::ArithTraits::mag_type *rwork, int info); static int trtri(const char uplo, const char diag, int n, const T *a, int lda); + + static void geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork, int *info); }; } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp new file mode 100644 index 0000000000..a0afa4bb73 --- /dev/null +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -0,0 +1,129 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct geqrf_tpl_spec_avail { + enum : bool { value = false }; +}; + +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct geqrf_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +} // namespace Impl +} // namespace KokkosLapack + +// MAGMA +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "magma_v2.h" + +namespace KokkosLapack { +namespace Impl { +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // CUSOLVER + +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include + +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp new file mode 100644 index 0000000000..1643e9bdf1 --- /dev/null +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -0,0 +1,429 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ + +namespace KokkosLapack { +namespace Impl { +template +inline void geqrf_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", typeid(AViewType).name(), + typeid(TauViewType).name(), typeid(InfoViewType).name()); +#else +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", typeid(AViewType).name(), + typeid(TauViewType).name(), typeid(InfoViewType).name()); +#endif +#endif +#endif +} +} // namespace Impl +} // namespace KokkosLapack + +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#include + +namespace KokkosLapack { +namespace Impl { + +template +void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft"); + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = A.stride(1); + + int lwork = -1; + Kokkos::View work("geqrf work buffer", 1); + + if constexpr (KokkosKernels::ArithTraits::is_complex) { + using MagType = typename KokkosKernels::ArithTraits::mag_type; + + HostLapack>::geqrf(m, n, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(work.data()), lwork, Info.data()); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0).real()); + + work = Kokkos::View("geqrf work buffer", lwork); + + HostLapack>::geqrf(m, n, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(work.data()), lwork, Info.data()); + } else { + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), lwork, Info.data()); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0)); + + work = Kokkos::View("geqrf work buffer", lwork); + + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), lwork, Info.data()); + } +} + +#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + geqrf_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using TauViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void geqrf(const EXECSPACE& /* space */, const AViewType& A, const TauViewType& Tau, \ + const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR "]"); \ + geqrf_print_specialization(); \ + lapackGeqrfWrapper(A, Tau, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#if 0 // TO DO + +// MAGMA +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include + +namespace KokkosLapack { +namespace Impl { + +template +void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, + const TauViewType& Tau, const InfoViewType& Info) { + using scalar_type = typename AViewType::non_const_value_type; + + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + + KokkosKernels::ArithTraits::name() + "]"); + geqrf_print_specialization(); + + magma_int_t N = static_cast(A.extent(1)); + magma_int_t AST = static_cast(A.stride(1)); + magma_int_t LDA = (AST == 0) ? 1 : AST; + magma_int_t BST = static_cast(B.stride(1)); + magma_int_t LDB = (BST == 0) ? 1 : BST; + magma_int_t NRHS = static_cast(B.extent(1)); + + KokkosLapack::Impl::MagmaSingleton& s = + KokkosLapack::Impl::MagmaSingleton::singleton(); + magma_int_t info = 0; + + space.fence(); + if constexpr (std::is_same_v) { + magma_sgeqrf_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), + LDA, reinterpret_cast(B.data()), + LDB, &info); + } + + if constexpr (std::is_same_v) { + magma_dgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + + if constexpr (std::is_same_v>) { + magma_cgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + + if constexpr (std::is_same_v>) { + magma_zgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + ExecSpace().fence(); + Kokkos::Profiling::popRegion(); +} + +#define KOKKOSLAPACK_GEQRF_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, const TauViewType& Tau, \ + const InfoViewType& Info) { \ + magmaGeqrfWrapper(space, A, Tau, Info); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +#endif // TO DO + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, + const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft"); + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = A.stride(1); + int lwork = 0; + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("cusolver sgeqrf workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL( + cusolverDnSgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), Workspace.data(), lwork, Info.data())); + } + if constexpr (std::is_same_v) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnDgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("cusolver dgeqrf workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL( + cusolverDnDgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), Workspace.data(), lwork, Info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL( + cusolverDnCgeqrf_bufferSize(s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + Kokkos::View Workspace("cusolver cgeqrf workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnCgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), lwork, Info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL( + cusolverDnZgeqrf_bufferSize(s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + Kokkos::View Workspace("cusolver zgeqrf workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnZgeqrf(s.handle, m, n, reinterpret_cast(A.data()), + lda, reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), lwork, + Info.data())); + } + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, const TauViewType& Tau, \ + const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + cusolverGeqrfWrapper(space, A, Tau, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + +// ROCSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include +#include + +namespace KokkosLapack { +namespace Impl { + +template +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, + const InfoViewType& Info) { + using Scalar = typename AViewType::non_const_value_type; + + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft"); + const rocblas_int m = static_cast(A.extent(0)); + const rocblas_int n = static_cast(A.extent(1)); + const rocblas_int lda = static_cast(A.stride(1)); + + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocsolver_sgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); + } + if constexpr (std::is_same_v) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocsolver_dgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); + } + if constexpr (std::is_same_v>) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocsolver_cgeqrf(s.handle, m, n, + reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()))); + } + if constexpr (std::is_same_v>) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocsolver_zgeqrf(s.handle, m, n, + reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()))); + } + Kokkos::deep_copy(Info, 0); // Success + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::HIP& space, const AViewType& A, const TauViewType& Tau, \ + const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + rocsolverGeqrfWrapper(space, A, Tau, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp index b43ad041e4..ed906200de 100644 --- a/lapack/unit_test/Test_Lapack.hpp +++ b/lapack/unit_test/Test_Lapack.hpp @@ -6,5 +6,6 @@ #include "Test_Lapack_gesv.hpp" #include "Test_Lapack_trtri.hpp" #include "Test_Lapack_svd.hpp" +#include "Test_Lapack_geqrf.hpp" #endif // TEST_LAPACK_HPP diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp new file mode 100644 index 0000000000..859e969f4b --- /dev/null +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -0,0 +1,452 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Only enable this test where KokkosLapack supports geqrf: +// CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) + +#include +#include +#include + +#include +#include +#include +#include + +namespace Test { + +template +void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, + typename ViewTypeTau::HostMirror const& h_tau, typename ViewTypeA::HostMirror& h_Q, + typename ViewTypeA::HostMirror& h_R, typename ViewTypeA::HostMirror& h_QR) { + using ScalarA = typename ViewTypeA::value_type; + + // ******************************************************************** + // Populate h_R + // ******************************************************************** + for (int i = 0; i < m; ++i) { + for (int j(0); j < n; ++j) { + if (i <= j) { + h_R(i, j) = h_A(i, j); + } else { + h_R(i, j) = KokkosKernels::ArithTraits::zero(); + } + } + } + + // ******************************************************************** + // Instantiate the m x m identity matrix h_I + // ******************************************************************** + ViewTypeA I("I", m, m); + typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + for (int i = 0; i < m; ++i) { + h_I(i, i) = ScalarA(1.); + } + + // ******************************************************************** + // Compute h_Q + // ******************************************************************** + int minMN(std::min(m, n)); + ViewTypeTau v("v", m); + typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); + + ViewTypeA Qk("Qk", m, m); + typename ViewTypeA::HostMirror h_Qk = Kokkos::create_mirror_view(Qk); + + ViewTypeA auxM("auxM", m, m); + typename ViewTypeA::HostMirror h_auxM = Kokkos::create_mirror_view(auxM); + + // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1: + // H(k) = I - Tau(k) * v * v**H, and + // v is a vector of size m with: + // v(0:k-1) = 0, + // v(k) = 1, + // v(k+1:m-1) = A(k+1:m-1,k). + for (int k = 0; k < minMN; ++k) { + Kokkos::deep_copy(h_v, KokkosKernels::ArithTraits::zero()); + h_v[k] = 1.; + for (int index(k + 1); index < m; ++index) { + h_v[index] = h_A(index, k); + } + + // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. + // void ger( const char trans[] + // , const typename AViewType::const_value_type & alpha + // , const XViewType & x + // , const YViewType & y + // , const AViewType & A + // ); + Kokkos::deep_copy(h_Qk, h_I); + KokkosBlas::ger("H", -h_tau[k], h_v, h_v, h_Qk); + + // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). + // void gemm( const char transA[] + // , const char transB[] + // , typename AViewType::const_value_type & alpha + // , const AViewType & A + // , const BViewType & B + // , typename CViewType::const_value_type & beta + // , const CViewType & C + // ); + if (k == 0) { + Kokkos::deep_copy(h_Q, h_Qk); + } else { + Kokkos::deep_copy(h_auxM, KokkosKernels::ArithTraits::zero()); + KokkosBlas::gemm("N", "N", 1., h_Q, h_Qk, 0., h_auxM); + Kokkos::deep_copy(h_Q, h_auxM); + } + } // for k + + // ******************************************************************** + // Check that Q^H Q = I + // ******************************************************************** + { + Kokkos::deep_copy(h_auxM, KokkosKernels::ArithTraits::zero()); + KokkosBlas::gemm("C", "N", 1., h_Q, h_Q, 0., h_auxM); + + typename KokkosKernels::ArithTraits::mag_type absTol(1.e-8); + if constexpr (std::is_same_v< + typename KokkosKernels::ArithTraits::mag_type, float>) { + absTol = 5.e-5; + } + + using ats = KokkosKernels::ArithTraits; + bool test_flag_QHQ = true; + for (int i = 0; (i < m) && test_flag_QHQ; ++i) { + for (int j = 0; (j < m) && test_flag_QHQ; ++j) { + if (ats::abs(h_auxM(i, j) - h_I(i, j)) > absTol) { + std::cout << "QHQ checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_auxM(i,j) = " << std::setprecision(16) << h_auxM(i, j) + << ", h_I(i,j) = " << std::setprecision(16) << h_I(i, j) << ", |diff| = " << std::setprecision(16) + << ats::abs(h_auxM(i, j) - h_I(i, j)) << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QHQ = false; + } + } + } + ASSERT_EQ(test_flag_QHQ, true); + } + + // ******************************************************************** + // Compute h_QR + // ******************************************************************** + Kokkos::deep_copy(h_QR, KokkosKernels::ArithTraits::zero()); + KokkosBlas::gemm("N", "N", 1., h_Q, h_R, 0., h_QR); +} + +template +void impl_test_geqrf(int m, int n) { + using ALayout_t = typename ViewTypeA::array_layout; + using ViewTypeInfo = Kokkos::View; + using execution_space = typename Device::execution_space; + using ScalarA = typename ViewTypeA::value_type; + using ats = KokkosKernels::ArithTraits; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + + int minMN(std::min(m, n)); + + // ******************************************************************** + // Create device views + // ******************************************************************** + ViewTypeA A("A", m, n); + ViewTypeA Aorig("Aorig", m, n); + ViewTypeTau Tau("Tau", minMN); + ViewTypeInfo Info("Info", 1); + + // ******************************************************************** + // Create host mirrors of device views + // ******************************************************************** + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(Aorig); + typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); + + // ******************************************************************** + // Initialize data + // ******************************************************************** + if ((m == 3) && (n == 3)) { + h_A(0, 0) = ScalarA(12.); + h_A(0, 1) = ScalarA(-51.); + h_A(0, 2) = ScalarA(4.); + + h_A(1, 0) = ScalarA(6.); + h_A(1, 1) = ScalarA(167.); + h_A(1, 2) = ScalarA(-68.); + + h_A(2, 0) = ScalarA(-4.); + h_A(2, 1) = ScalarA(24.); + h_A(2, 2) = ScalarA(-41.); + + Kokkos::deep_copy(A, h_A); + } else { + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); + Kokkos::deep_copy(h_A, A); + } + + Kokkos::deep_copy(h_Aorig, h_A); + Kokkos::fence(); + + // ******************************************************************** + // Perform the QR factorization + // ******************************************************************** + try { + execution_space space{}; + KokkosLapack::geqrf(space, A, Tau, Info); + } catch (const std::runtime_error& e) { + std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl; + FAIL(); + return; + } + + Kokkos::fence(); + + Kokkos::deep_copy(h_info, Info); + EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0]; + + // ******************************************************************** + // Get the results + // ******************************************************************** + Kokkos::deep_copy(h_A, A); + Kokkos::deep_copy(h_tau, Tau); + + typename KokkosKernels::ArithTraits::mag_type absTol(1.e-8); + if constexpr (std::is_same_v::mag_type, + float>) { + absTol = 5.e-5; + } + + // ******************************************************************** + // Check outputs h_A and h_tau + // ******************************************************************** + if ((m == 3) && (n == 3)) { + Kokkos::View refMatrix("ref matrix", m, n); + Kokkos::View refTau("ref tau", m); + + refMatrix(0, 0) = ScalarA(-14.); + refMatrix(0, 1) = ScalarA(-21.); + refMatrix(0, 2) = ScalarA(14.); + + refMatrix(1, 0) = ScalarA(0.2307692307692308); + refMatrix(1, 1) = ScalarA(-175.); + refMatrix(1, 2) = ScalarA(70.); + + refMatrix(2, 0) = ScalarA(-0.1538461538461539); + refMatrix(2, 1) = ScalarA(1. / 18.); + refMatrix(2, 2) = ScalarA(-35.); + + refTau(0) = ScalarA(1.857142857142857); + refTau(1) = ScalarA(1.993846153846154); + refTau(2) = ScalarA(0.); + + { + bool test_flag_A = true; + for (int i = 0; (i < m) && test_flag_A; ++i) { + for (int j = 0; (j < n) && test_flag_A; ++j) { + if (ats::abs(h_A(i, j) - refMatrix(i, j)) > absTol) { + std::cout << "h_Aoutput checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_Aoutput(i,j) = " << std::setprecision(16) << h_A(i, j) + << ", refMatrix(i,j) = " << std::setprecision(16) << refMatrix(i, j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_A(i, j) - refMatrix(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_A = false; + } + } + } + ASSERT_EQ(test_flag_A, true); + } + + { + bool test_flag_tau = true; + for (int i = 0; (i < m) && test_flag_tau; ++i) { + if (ats::abs(h_tau(i) - refTau(i)) > absTol) { + std::cout << "tau checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", h_tau(i,j) = " << std::setprecision(16) + << h_tau(i) << ", refTau(i,j) = " << std::setprecision(16) << refTau(i) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_tau(i) - refTau(i)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_tau = false; + } + } + ASSERT_EQ(test_flag_tau, true); + } + } + + // ******************************************************************** + // Compute Q, R, and QR + // ******************************************************************** + ViewTypeA Q("Q", m, m); + ViewTypeA R("R", m, n); + ViewTypeA QR("QR", m, n); + + typename ViewTypeA::HostMirror h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::HostMirror h_R = Kokkos::create_mirror_view(R); + typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR); + + getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); + + // ******************************************************************** + // Check Q, R, and QR + // ******************************************************************** + if ((m == 3) && (n == 3)) { + Kokkos::View refQ("ref Q", m, n); + Kokkos::View refR("ref Q", m, n); + + refQ(0, 0) = ScalarA(-6. / 7.); + refQ(0, 1) = ScalarA(69. / 175.); + refQ(0, 2) = ScalarA(58. / 175.); + + refQ(1, 0) = ScalarA(-3. / 7.); + refQ(1, 1) = ScalarA(-158. / 175.); + refQ(1, 2) = ScalarA(-6. / 175.); + + refQ(2, 0) = ScalarA(2. / 7.); + refQ(2, 1) = ScalarA(-6. / 35.); + refQ(2, 2) = ScalarA(33. / 35.); + + refR(0, 0) = ScalarA(-14.); + refR(0, 1) = ScalarA(-21.); + refR(0, 2) = ScalarA(14.); + + refR(1, 1) = ScalarA(-175.); + refR(1, 2) = ScalarA(70.); + + refR(2, 2) = ScalarA(-35.); + + { + bool test_flag_Q = true; + for (int i = 0; (i < m) && test_flag_Q; ++i) { + for (int j = 0; (j < n) && test_flag_Q; ++j) { + if (ats::abs(h_Q(i, j) - refQ(i, j)) > absTol) { + std::cout << "Q checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i, j) + << ", refQ(i,j) = " << std::setprecision(16) << refQ(i, j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_Q(i, j) - refQ(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_Q = false; + } + } + } + ASSERT_EQ(test_flag_Q, true); + } + + { + bool test_flag_R = true; + for (int i = 0; (i < m) && test_flag_R; ++i) { + for (int j = 0; (j < n) && test_flag_R; ++j) { + if (ats::abs(h_R(i, j) - refR(i, j)) > absTol) { + std::cout << "R checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_R(i,j) = " << std::setprecision(16) << h_R(i, j) + << ", refR(i,j) = " << std::setprecision(16) << refR(i, j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_R(i, j) - refR(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_R = false; + } + } + } + ASSERT_EQ(test_flag_R, true); + } + } + + // ******************************************************************** + // Check that A = QR + // ******************************************************************** + { + bool test_flag_QR = true; + for (int i = 0; (i < m) && test_flag_QR; ++i) { + for (int j = 0; (j < n) && test_flag_QR; ++j) { + if (ats::abs(h_QR(i, j) - h_Aorig(i, j)) > absTol) { + std::cout << "QR checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i, j) + << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i, j) << ", |diff| = " << std::setprecision(16) + << ats::abs(h_QR(i, j) - h_Aorig(i, j)) << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QR = false; + } + } + } + ASSERT_EQ(test_flag_QR, true); + } +} + +} // namespace Test + +template +void test_geqrf() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_ll = Kokkos::View; + using view_type_tau_ll = Kokkos::View; + + Test::impl_test_geqrf(1, 1); + Test::impl_test_geqrf(2, 1); + Test::impl_test_geqrf(2, 2); + Test::impl_test_geqrf(3, 1); + Test::impl_test_geqrf(3, 2); + Test::impl_test_geqrf(3, 3); + + Test::impl_test_geqrf(100, 70); + Test::impl_test_geqrf(70, 100); + Test::impl_test_geqrf(100, 100); +#endif +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float"); + test_geqrf(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_double"); + test_geqrf(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); + test_geqrf, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); + test_geqrf, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#endif // CUDA+CUSOLVER or HIP+ROCSOLVER or LAPACK+HOST From 1da291f2e7a15dcd3615c685173b579c0788719b Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 28 Jan 2026 11:20:03 +0900 Subject: [PATCH 2/6] Lapack - QR: adding documentation Signed-off-by: Luc Berger-Vergiat --- docs/source/API/lapack/geqrf.rst | 59 ++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 docs/source/API/lapack/geqrf.rst diff --git a/docs/source/API/lapack/geqrf.rst b/docs/source/API/lapack/geqrf.rst new file mode 100644 index 0000000000..7735d5734b --- /dev/null +++ b/docs/source/API/lapack/geqrf.rst @@ -0,0 +1,59 @@ +KokkosLapack::geqrf +################### + +Defined in header: :code:`KokkosLapack_geqrf.hpp` + +.. code:: c++ + + template + void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, const InfoArray& Info); + + template + void geqrf(const AMatrix& A, const TauArray& Tau, const InfoArray& Info); + +Performs the QR factorization of matrix :math:`A` + +.. math:: + + A=Q*R + +where :math:`A` is the input coefficient matrix on entry and the resulting :math:`R` factor and scaled Householder vectors on exit. :math:`Tau` stores the scaling factors associated with the Householder vectors. + +1. Overwrites :math:`A` with the :math:`QR` factors using the resources of ``space``. +2. Same as 1. but use the resources of ``KernelHandle::HandleExecSpace{}``. + +The function will throw a runtime exception if ``Tau.extent(0) < Kokkos::min(A.extent(0), A.extent(1))`` or if ``Info.extent(0) == 0``. + +Parameters +========== + +:space: execution space instance. + +:A: The input matrix on entry and the :math:`QR` factors on return. + +:Tau: rank-1 view of size min(M,N) that contains the scaling factors of the elementary reflectors. + +:Info: rank-1 view of integers and of size 1: Info[0] = 0: successfull exit; Info[0] < 0: if equal to '-i', the i-th argument had an illegal value. + +Type Requirements +================= + +- `ExecutionSpace` must be a Kokkos `execution space `_ + +- `AMatrix` must be a Kokkos `View `_ of rank 2 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +- `Tau` must be a Kokkos `View `_ of rank 1 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +- `Info` must be a Kokkos `View `_ of rank 1 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +Example +======= + +TBD + From 2f809fc4f59691e1189b97bb122ceb1c5ce3fb69 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 28 Jan 2026 11:23:55 +0900 Subject: [PATCH 3/6] Lapack - QR: fixing documentation Signed-off-by: Luc Berger-Vergiat --- docs/source/API/lapack-index.rst | 3 ++- scripts/check_api_updates.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/API/lapack-index.rst b/docs/source/API/lapack-index.rst index 34fc1590b0..3133892a8a 100644 --- a/docs/source/API/lapack-index.rst +++ b/docs/source/API/lapack-index.rst @@ -5,6 +5,7 @@ API: LAPACK :maxdepth: 2 :hidden: + lapack/geqrf lapack/gesv lapack/gesvd lapack/trtri @@ -69,7 +70,7 @@ Below are tables summarizing the currently supported function calls and third pa - - * - geqrf - - + - :doc:`geqrf ` - - - diff --git a/scripts/check_api_updates.py b/scripts/check_api_updates.py index 64d1976235..68a09499b8 100644 --- a/scripts/check_api_updates.py +++ b/scripts/check_api_updates.py @@ -55,6 +55,7 @@ ('blas/src/KokkosBlas3_gemm.hpp', ['blas3_gemm.rst']), ('blas/src/KokkosBlas3_trmm.hpp', ['blas3_trmm.rst']), ('blas/src/KokkosBlas3_trsm.hpp', ['blas3_trsm.rst']), + ('lapack/src/KokkosLapack_geqrf.hpp', ['docs/source/API/lapack/geqrf.rst']), ('lapack/src/KokkosLapack_gesv.hpp', ['docs/source/API/lapack/gesv.rst']), ('lapack/src/KokkosLapack_svd.hpp', ['docs/source/API/lapack/gesvd.rst']), ('lapack/src/KokkosLapack_trtri.hpp', ['docs/source/API/lapack/trtri.rst']), From bcd59bc5a33703dadca42ad64c7130b39084838c Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 29 Jan 2026 16:30:49 +0900 Subject: [PATCH 4/6] Lapack - geqrf: resolving co-pilot review Signed-off-by: Luc Berger-Vergiat --- docs/source/API/lapack/geqrf.rst | 6 +++--- .../KokkosLapack_geqrf_eti_spec_decl.hpp.in | 4 ++-- lapack/impl/KokkosLapack_geqrf_impl.hpp | 2 +- lapack/impl/KokkosLapack_geqrf_spec.hpp | 20 +++++++++---------- lapack/src/KokkosLapack_geqrf.hpp | 9 ++++----- .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 2 +- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 2 +- 7 files changed, 22 insertions(+), 23 deletions(-) diff --git a/docs/source/API/lapack/geqrf.rst b/docs/source/API/lapack/geqrf.rst index 7735d5734b..ea3758dc96 100644 --- a/docs/source/API/lapack/geqrf.rst +++ b/docs/source/API/lapack/geqrf.rst @@ -20,9 +20,9 @@ Performs the QR factorization of matrix :math:`A` where :math:`A` is the input coefficient matrix on entry and the resulting :math:`R` factor and scaled Householder vectors on exit. :math:`Tau` stores the scaling factors associated with the Householder vectors. 1. Overwrites :math:`A` with the :math:`QR` factors using the resources of ``space``. -2. Same as 1. but use the resources of ``KernelHandle::HandleExecSpace{}``. +2. Same as 1. but uses the resources of the default execution space from ``AMatrix::execution_space``. -The function will throw a runtime exception if ``Tau.extent(0) < Kokkos::min(A.extent(0), A.extent(1))`` or if ``Info.extent(0) == 0``. +The function will throw a runtime exception if ``Tau.extent(0) < Kokkos::min(A.extent(0), A.extent(1))`` or if ``Info.extent(0) < 1``. Parameters ========== @@ -33,7 +33,7 @@ Parameters :Tau: rank-1 view of size min(M,N) that contains the scaling factors of the elementary reflectors. -:Info: rank-1 view of integers and of size 1: Info[0] = 0: successfull exit; Info[0] < 0: if equal to '-i', the i-th argument had an illegal value. +:Info: rank-1 view of integers and of size 1: Info[0] = 0: successful exit; Info[0] < 0: if equal to '-i', the i-th argument had an illegal value. Type Requirements ================= diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in index ae6318f4e2..5b836b5cc7 100644 --- a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_decl.hpp.in @@ -6,6 +6,6 @@ namespace KokkosLapack { namespace Impl { @LAPACK_GEQRF_ETI_DECL_BLOCK@ - } //IMPL -} //Kokkos + } // IMPL +} // namespace KokkosLapack #endif diff --git a/lapack/impl/KokkosLapack_geqrf_impl.hpp b/lapack/impl/KokkosLapack_geqrf_impl.hpp index c3a25824db..a55161f284 100644 --- a/lapack/impl/KokkosLapack_geqrf_impl.hpp +++ b/lapack/impl/KokkosLapack_geqrf_impl.hpp @@ -18,7 +18,7 @@ #define KOKKOSLAPACK_IMPL_GEQRF_HPP_ /// \file KokkosLapack_geqrf_impl.hpp -/// \brief Implementation(s) of dense linear solve. +/// \brief Implementation(s) of QR factorization. #include #include diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index 84d2bf8b9f..057869cbce 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -28,7 +28,7 @@ namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct geqrf_eti_spec_avail { enum : bool { value = false }; }; @@ -63,20 +63,20 @@ namespace KokkosLapack { namespace Impl { // Unification layer -template ::value, - bool eti_spec_avail = geqrf_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = geqrf_eti_spec_avail::value> struct GEQRF { - static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau, const TWArray &Work, - const RType &R); + static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TauArray &Tau, + const InfoArray &info); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY // Unification layer -template -struct GEQRF { - static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, const TWArray & /* Tau */, - const TWArray & /* Work */, const RType & /* R */) { +template +struct GEQRF { + static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, const TauArray & /* Tau */, + const InfoArray & /* Info */) { // NOTE: Might add the implementation of KokkosLapack::geqrf later throw std::runtime_error( "No fallback implementation of GEQRF (general QR factorization) " diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 3216203f27..ba3c680ac1 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -54,7 +54,7 @@ namespace KokkosLapack { /// \param Tau [out] One-dimensional array of size min(M,N) that contains the /// scalar factors of the elementary reflectors. /// \param Info [out] One-dimensional array of integers and of size 1: -/// Info[0] = 0: successfull exit +/// Info[0] = 0: successful exit /// Info[0] < 0: if equal to '-i', the i-th argument had an /// illegal value /// @@ -94,10 +94,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, c KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if (info0 == 0) { + if (info0 < 1) { std::ostringstream os; - os << "KokkosLapack::geqrf: length of Info must be at least 1: " - << " A: " << m << " x " << n << ", Info length = " << info0; + os << "KokkosLapack::geqrf: length of Info must be at least 1, Info length = " << info0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -137,7 +136,7 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, c /// \param Tau [out] One-dimensional array of size min(M,N) that contains the /// scalar factors of the elementary reflectors. /// \param Info [out] One-dimensional array of integers and of size 1: -/// Info[0] = 0: successfull exit +/// Info[0] = 0: successful exit /// Info[0] < 0: if equal to '-i', the i-th argument had an /// illegal value /// diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index a0afa4bb73..9a1499d054 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -20,7 +20,7 @@ namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct geqrf_tpl_spec_avail { enum : bool { value = false }; }; diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 1643e9bdf1..c806e657bb 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -55,7 +55,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, const InfoVi const int lda = A.stride(1); int lwork = -1; - Kokkos::View work("geqrf work buffer", 1); + Kokkos::View work; if constexpr (KokkosKernels::ArithTraits::is_complex) { using MagType = typename KokkosKernels::ArithTraits::mag_type; From 8a7823a628ef3458cb1d36347fca31d9c423a210 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 29 Jan 2026 16:35:07 +0900 Subject: [PATCH 5/6] Lapack - geqrf: applying clang-format Signed-off-by: Luc Berger-Vergiat --- lapack/impl/KokkosLapack_geqrf_spec.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index 057869cbce..d8fce5e81d 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -67,8 +67,7 @@ template ::value, bool eti_spec_avail = geqrf_eti_spec_avail::value> struct GEQRF { - static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TauArray &Tau, - const InfoArray &info); + static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TauArray &Tau, const InfoArray &info); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY From 99d7ee530f662cb2ed62c9746817144cdc28bc48 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Fri, 30 Jan 2026 15:18:51 +0900 Subject: [PATCH 6/6] Lapack - QR: fixing HostMirror to host_mirror_type Also simplifying some memory space specification, there might still be an issue with a View constructor though... Signed-off-by: Luc Berger-Vergiat --- lapack/unit_test/Test_Lapack_geqrf.hpp | 34 +++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 859e969f4b..61d4f676d5 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -33,9 +33,9 @@ namespace Test { template -void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, - typename ViewTypeTau::HostMirror const& h_tau, typename ViewTypeA::HostMirror& h_Q, - typename ViewTypeA::HostMirror& h_R, typename ViewTypeA::HostMirror& h_QR) { +void getQR(int const m, int const n, typename ViewTypeA::host_mirror_type const& h_A, + typename ViewTypeTau::host_mirror_type const& h_tau, typename ViewTypeA::host_mirror_type& h_Q, + typename ViewTypeA::host_mirror_type& h_R, typename ViewTypeA::host_mirror_type& h_QR) { using ScalarA = typename ViewTypeA::value_type; // ******************************************************************** @@ -55,7 +55,7 @@ void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, // Instantiate the m x m identity matrix h_I // ******************************************************************** ViewTypeA I("I", m, m); - typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + typename ViewTypeA::host_mirror_type h_I = Kokkos::create_mirror_view(I); for (int i = 0; i < m; ++i) { h_I(i, i) = ScalarA(1.); } @@ -65,13 +65,13 @@ void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, // ******************************************************************** int minMN(std::min(m, n)); ViewTypeTau v("v", m); - typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); + typename ViewTypeTau::host_mirror_type h_v = Kokkos::create_mirror_view(v); ViewTypeA Qk("Qk", m, m); - typename ViewTypeA::HostMirror h_Qk = Kokkos::create_mirror_view(Qk); + typename ViewTypeA::host_mirror_type h_Qk = Kokkos::create_mirror_view(Qk); ViewTypeA auxM("auxM", m, m); - typename ViewTypeA::HostMirror h_auxM = Kokkos::create_mirror_view(auxM); + typename ViewTypeA::host_mirror_type h_auxM = Kokkos::create_mirror_view(auxM); // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1: // H(k) = I - Tau(k) * v * v**H, and @@ -175,10 +175,10 @@ void impl_test_geqrf(int m, int n) { // ******************************************************************** // Create host mirrors of device views // ******************************************************************** - typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(Aorig); - typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); + typename ViewTypeA::host_mirror_type h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::host_mirror_type h_Aorig = Kokkos::create_mirror_view(Aorig); + typename ViewTypeTau::host_mirror_type h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeInfo::host_mirror_type h_info = Kokkos::create_mirror_view(Info); // ******************************************************************** // Initialize data @@ -238,8 +238,8 @@ void impl_test_geqrf(int m, int n) { // Check outputs h_A and h_tau // ******************************************************************** if ((m == 3) && (n == 3)) { - Kokkos::View refMatrix("ref matrix", m, n); - Kokkos::View refTau("ref tau", m); + Kokkos::View refMatrix("ref matrix", m, n); + Kokkos::View refTau("ref tau", m); refMatrix(0, 0) = ScalarA(-14.); refMatrix(0, 1) = ScalarA(-21.); @@ -298,9 +298,9 @@ void impl_test_geqrf(int m, int n) { ViewTypeA R("R", m, n); ViewTypeA QR("QR", m, n); - typename ViewTypeA::HostMirror h_Q = Kokkos::create_mirror_view(Q); - typename ViewTypeA::HostMirror h_R = Kokkos::create_mirror_view(R); - typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR); + typename ViewTypeA::host_mirror_type h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::host_mirror_type h_R = Kokkos::create_mirror_view(R); + typename ViewTypeA::host_mirror_type h_QR = Kokkos::create_mirror_view(QR); getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); @@ -309,7 +309,7 @@ void impl_test_geqrf(int m, int n) { // ******************************************************************** if ((m == 3) && (n == 3)) { Kokkos::View refQ("ref Q", m, n); - Kokkos::View refR("ref Q", m, n); + Kokkos::View refR("ref R", m, n); refQ(0, 0) = ScalarA(-6. / 7.); refQ(0, 1) = ScalarA(69. / 175.);