Skip to content

Commit c988bde

Browse files
authored
Add Nvidia lapack support (cusolver backend) (#154)
* Add Nvidia lapack support (cusolver backend) * add cusolver gebrd, geqrf, getrf, getrs, gesvd * Add additional cusolver functions * add cusolver potrf and potrs batch implementation * Fix malloc_device * update README
1 parent f43e4e5 commit c988bde

26 files changed

+10003
-10
lines changed

CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ if(ENABLE_MKLCPU_BACKEND)
4646
option(ENABLE_MKLCPU_THREAD_TBB "" ON)
4747
endif()
4848
option(ENABLE_CUBLAS_BACKEND "" OFF)
49+
50+
option(ENABLE_CUSOLVER_BACKEND "" OFF)
51+
4952
option(ENABLE_CURAND_BACKEND "" OFF)
5053
option(ENABLE_NETLIB_BACKEND "" OFF)
5154
set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
@@ -65,7 +68,8 @@ if(ENABLE_MKLCPU_BACKEND
6568
list(APPEND DOMAINS_LIST "blas")
6669
endif()
6770
if(ENABLE_MKLCPU_BACKEND
68-
OR ENABLE_MKLGPU_BACKEND)
71+
OR ENABLE_MKLGPU_BACKEND
72+
OR ENABLE_CUSOLVER_BACKEND)
6973
list(APPEND DOMAINS_LIST "lapack")
7074
endif()
7175
if(ENABLE_MKLCPU_BACKEND

README.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ oneMKL is part of [oneAPI](https://oneapi.io).
3131
<td align="center"><a href="https://developer.nvidia.com/cublas"> NVIDIA cuBLAS</a> for NVIDIA GPU </td>
3232
<td align="center">NVIDIA GPU</td>
3333
</tr>
34+
<tr>
35+
<td align="center"><a href="https://developer.nvidia.com/cusolver"> NVIDIA cuSOLVER</a> for NVIDIA GPU </td>
36+
<td align="center">NVIDIA GPU</td>
37+
</tr>
3438
<tr>
3539
<td align="center"><a href="https://developer.nvidia.com/curand"> NVIDIA cuRAND</a> for NVIDIA GPU </td>
3640
<td align="center">NVIDIA GPU</td>
@@ -159,6 +163,11 @@ Supported domains: BLAS, LAPACK, RNG
159163
<td align="center">Intel GPU</td>
160164
<td align="center">Dynamic, Static</td>
161165
</tr>
166+
<tr>
167+
<td align="center">NVIDIA GPU</td>
168+
<td align="center">NVIDIA cuSOLVER</td>
169+
<td align="center">Dynamic, Static</td>
170+
</tr>
162171
<tr>
163172
<td rowspan=3 align="center">RNG</td>
164173
<td align="center">x86 CPU</td>
@@ -571,12 +580,18 @@ ctest
571580
cmake --install . --prefix <path_to_install_dir>
572581
```
573582

574-
To build with the cuRAND backend instead simply replace:
583+
To build with the cuSOLVER or cuRAND backend instead simply replace:
575584
```bash
576585
-DENABLE_CUBLAS_BACKEND=True \
577586
```
578587

579588
With:
589+
```bash
590+
-DENABLE_CUSOLVER_BACKEND=True \
591+
```
592+
593+
or
594+
580595
```bash
581596
-DENABLE_CURAND_BACKEND=True \
582597
```
@@ -592,6 +607,7 @@ build_shared_libs | BUILD_SHARED_LIBS | True, False | True
592607
enable_mklcpu_backend | ENABLE_MKLCPU_BACKEND | True, False | True
593608
enable_mklgpu_backend | ENABLE_MKLGPU_BACKEND | True, False | True
594609
*Not Supported* | ENABLE_CUBLAS_BACKEND | True, False | False
610+
*Not Supported* | ENABLE_CUSOLVER_BACKEND | True, False | False
595611
*Not Supported* | ENABLE_CURAND_BACKEND | True, False | False
596612
*Not Supported* | ENABLE_NETLIB_BACKEND | True, False | False
597613
enable_mklcpu_thread_tbb | ENABLE_MKLCPU_THREAD_TBB | True, False | True

cmake/FindCompiler.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ if(is_dpcpp)
3333
if(UNIX)
3434
set(UNIX_INTERFACE_COMPILE_OPTIONS -fsycl)
3535
set(UNIX_INTERFACE_LINK_OPTIONS -fsycl)
36-
if(ENABLE_CURAND_BACKEND)
36+
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
3737
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
38-
-fsycl-targets=nvptx64-nvidia-cuda-sycldevice -fsycl-unnamed-lambda)
38+
-fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
3939
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
40-
-fsycl-targets=nvptx64-nvidia-cuda-sycldevice)
40+
-fsycl-targets=nvptx64-nvidia-cuda)
4141
endif()
42-
if(ENABLE_CURAND_BACKEND)
42+
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
4343
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
4444
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
4545
INTERFACE_LINK_OPTIONS "${UNIX_INTERFACE_LINK_OPTIONS}"

cmake/FindcuSOLVER.cmake

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#==========================================================================
2+
# Copyright (C) Codeplay Software Limited
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# For your convenience, a copy of the License has been included in this
10+
# repository.
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
#=========================================================================
19+
20+
find_package(CUDA 10.0 REQUIRED)
21+
get_filename_component(SYCL_BINARY_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
22+
# the OpenCL include file from cuda is opencl 1.1 and it is not compatible with DPC++
23+
# the OpenCL include headers 1.2 onward is required. This is used to bypass NVIDIA OpenCL headers
24+
find_path(OPENCL_INCLUDE_DIR CL/cl.h OpenCL/cl.h
25+
HINTS
26+
${OPENCL_INCLUDE_DIR}
27+
${SYCL_BINARY_DIR}/../include/sycl/
28+
)
29+
# this is work around to avoid duplication half creation in both cuda and SYCL
30+
add_compile_definitions(CUDA_NO_HALF)
31+
32+
find_package(Threads REQUIRED)
33+
34+
include(FindPackageHandleStandardArgs)
35+
find_package_handle_standard_args(cuSOLVER
36+
REQUIRED_VARS
37+
CUDA_TOOLKIT_INCLUDE
38+
CUDA_cusolver_LIBRARY
39+
CUDA_LIBRARIES
40+
CUDA_CUDA_LIBRARY
41+
OPENCL_INCLUDE_DIR
42+
)
43+
if(NOT TARGET ONEMKL::cuSOLVER::cuSOLVER)
44+
add_library(ONEMKL::cuSOLVER::cuSOLVER SHARED IMPORTED)
45+
set_target_properties(ONEMKL::cuSOLVER::cuSOLVER PROPERTIES
46+
IMPORTED_LOCATION ${CUDA_cusolver_LIBRARY}
47+
INTERFACE_INCLUDE_DIRECTORIES "${OPENCL_INCLUDE_DIR};${CUDA_TOOLKIT_INCLUDE}"
48+
INTERFACE_LINK_LIBRARIES "Threads::Threads;${CUDA_CUDA_LIBRARY};${CUDA_LIBRARIES}"
49+
)
50+
51+
endif()

include/oneapi/mkl/detail/backend_selector_predicates.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,19 @@ inline void backend_selector_precondition<backend::cublas>(cl::sycl::queue& queu
8181
#endif
8282
}
8383

84+
template <>
85+
inline void backend_selector_precondition<backend::cusolver>(cl::sycl::queue& queue) {
86+
#ifndef ONEMKL_DISABLE_PREDICATES
87+
unsigned int vendor_id =
88+
static_cast<unsigned int>(queue.get_device().get_info<cl::sycl::info::device::vendor_id>());
89+
if (!(queue.get_device().is_gpu() && vendor_id == NVIDIA_ID)) {
90+
throw unsupported_device("",
91+
"backend_selector<backend::" + backend_map[backend::cusolver] + ">",
92+
queue.get_device());
93+
}
94+
#endif
95+
}
96+
8497
} // namespace mkl
8598
} // namespace oneapi
8699

include/oneapi/mkl/detail/backends.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@
2626
namespace oneapi {
2727
namespace mkl {
2828

29-
enum class backend { mklcpu, mklgpu, cublas, curand, netlib, unsupported };
29+
enum class backend { mklcpu, mklgpu, cublas, cusolver, curand, netlib, unsupported };
3030

3131
typedef std::map<backend, std::string> backendmap;
3232

3333
static backendmap backend_map = {
3434
{ backend::mklcpu, "mklcpu" }, { backend::mklgpu, "mklgpu" },
35-
{ backend::cublas, "cublas" }, { backend::curand, "curand" },
36-
{ backend::netlib, "netlib" }, { backend::unsupported, "unsupported" }
35+
{ backend::cublas, "cublas" }, { backend::cusolver, "cusolver" },
36+
{ backend::curand, "curand" }, { backend::netlib, "netlib" },
37+
{ backend::unsupported, "unsupported" }
3738
};
3839

3940
} //namespace mkl

include/oneapi/mkl/detail/backends_table.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
7474
{
7575
#ifdef ENABLE_MKLGPU_BACKEND
7676
LIB_NAME("lapack_mklgpu")
77+
#endif
78+
} },
79+
{ device::nvidiagpu,
80+
{
81+
#ifdef ENABLE_CUSOLVER_BACKEND
82+
LIB_NAME("lapack_cusolver")
7783
#endif
7884
} } } },
7985

include/oneapi/mkl/lapack.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,8 @@
2727
#ifdef ENABLE_MKLGPU_BACKEND
2828
#include "oneapi/mkl/lapack/detail/mklgpu/lapack_ct.hpp"
2929
#endif
30+
#ifdef ENABLE_CUSOLVER_BACKEND
31+
#include "oneapi/mkl/lapack/detail/cusolver/lapack_ct.hpp"
32+
#endif
3033

3134
#include "oneapi/mkl/lapack/detail/lapack_rt.hpp"
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/***************************************************************************
2+
* Copyright (C) Codeplay Software Limited
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* For your convenience, a copy of the License has been included in this
10+
* repository.
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
**************************************************************************/
19+
20+
#ifndef _DETAIL_CUSOLVER_LAPACK_CT_HPP_
21+
#define _DETAIL_CUSOLVER_LAPACK_CT_HPP_
22+
23+
#include <CL/sycl.hpp>
24+
#include <complex>
25+
#include <cstdint>
26+
27+
#include "oneapi/mkl/types.hpp"
28+
#include "oneapi/mkl/lapack/types.hpp"
29+
#include "oneapi/mkl/detail/backend_selector.hpp"
30+
#include "oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp"
31+
32+
namespace oneapi {
33+
namespace mkl {
34+
namespace lapack {
35+
36+
#define LAPACK_BACKEND cusolver
37+
#include "lapack_ct.hxx"
38+
#undef LAPACK_BACKEND
39+
40+
} // namespace lapack
41+
} // namespace mkl
42+
} // namespace oneapi
43+
44+
#endif //_DETAIL_CUSOLVER_LAPACK_CT_HPP_

0 commit comments

Comments
 (0)