From 9b322071a282a79b73b5c656b45b6c4d7cae8343 Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Thu, 9 Apr 2020 11:35:17 -0700
Subject: [PATCH 01/13] Fix cuSPARSE import bug

---
 skcuda/cusparse.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index db2c7888..b9cf4b8d 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -6,6 +6,8 @@
 Note: this module does not explicitly depend on PyCUDA.
 """
 
+from __future__ import absolute_import
+
 import atexit
 import ctypes.util
 import platform
@@ -15,7 +17,7 @@
 
 import numpy as np
 
-import cuda
+from . import cuda
 
 # Load library:
 _version_list = [10.1, 10.0, 9.2, 9.1, 9.0, 8.0, 7.5, 7.0, 6.5, 6.0, 5.5, 5.0, 4.0]

From 9863bc65a9e4a75ceb78cb0120b3ba2d47211ace Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Thu, 9 Apr 2020 16:58:44 -0700
Subject: [PATCH 02/13] Convert cuparse handle to c_void_p

---
 skcuda/cusparse.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index b9cf4b8d..45975c73 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -174,13 +174,13 @@ def cusparseCreate():
 
     """
 
-    handle = ctypes.c_int()
+    handle = ctypes.c_void_p()
     status = _libcusparse.cusparseCreate(ctypes.byref(handle))
     cusparseCheckStatus(status)
     return handle.value
 
 _libcusparse.cusparseDestroy.restype = int
-_libcusparse.cusparseDestroy.argtypes = [ctypes.c_int]
+_libcusparse.cusparseDestroy.argtypes = [ctypes.c_void_p]
 def cusparseDestroy(handle):
     """
     Release CUSPARSE resources.

From 104b0942ad71d919780f2506d10d35e0f0217c7c Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Thu, 9 Apr 2020 17:26:18 -0700
Subject: [PATCH 03/13] Implement cusparseSgtsv2StridedBatch_bufferSizeExt

---
 skcuda/cusparse.py     | 68 +++++++++++++++++++++++++++++++++++++++++-
 tests/test_cusparse.py | 62 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_cusparse.py

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 45975c73..30ccdd19 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -8,7 +8,6 @@
 
 from __future__ import absolute_import
 
-import atexit
 import ctypes.util
 import platform
 from string import Template
@@ -88,6 +87,7 @@ class cusparseStatusMatrixTypeNotSupported(cusparseError):
     """The matrix type is not supported by this function"""
     pass
 
+# TODO: Check if this is complete list of exceptions, and that numbers are correct.
 cusparseExceptions = {
     1: cusparseStatusNotInitialized,
     2: cusparseStatusAllocFailed,
@@ -388,3 +388,69 @@ def cusparseSdense2csr(handle, m, n, descrA, A, lda,
                        nnzPerRow, csrValA, csrRowPtrA, csrColIndA):
     # Unfinished
     pass
+
+_libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.restype = int
+_libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.argtypes =\
+    [ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount, batchStride):
+    """
+    Calculate size of work buffer used by cusparseSgtsv2StridedBatch.
+
+    Parameters
+    ----------
+    handle : ctypes.c_void_p
+        cuSPARSE context
+    m : int
+        Size of the linear system (must be >= 3)
+    dl : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the lower
+        diagonal of the tri-diagonal linear system. The lower diagonal dl(i)
+        that corresponds to the ith linear system starts at location
+        dl+batchStride*i in memory. Also, the first element of each lower
+        diagonal must be zero.
+    d : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the main
+        diagonal of the tri-diagonal linear system. The main diagonal d(i)
+        that corresponds to the ith linear system starts at location
+        d+batchStride*i in memory.
+    du : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the upper
+        diagonal of the tri-diagonal linear system. The upper diagonal du(i)
+        that corresponds to the ith linear system starts at location
+        du+batchStride*i in memory. Also, the last element of each upper
+        diagonal must be zero.
+    x : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array that contains the
+        right-hand-side of the tri-diagonal linear system. The
+        right-hand-side x(i) that corresponds to the ith linear system
+        starts at location x+batchStride*i in memory.
+    batchCount : int
+        Number of systems to solve.
+    batchStride : int
+        Stride (number of elements) that separates the vectors of every
+        system (must be at least m).
+
+    Returns
+    -------
+    bufferSizeInBytes : int
+        number of bytes of the buffer used in the gtsv2StridedBatch.
+
+    References
+    ----------
+    `cusparse<t>gtsv2StridedBatch_bufferSizeExt <https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch_bufferSize>`_
+    """
+    bufferSizeInBytes = ctypes.c_int()
+    status = _libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt(
+        handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride,
+        ctypes.byref(bufferSizeInBytes))
+    cusparseCheckStatus(status)
+    return bufferSizeInBytes.value
\ No newline at end of file
diff --git a/tests/test_cusparse.py b/tests/test_cusparse.py
new file mode 100644
index 00000000..fa52ce1b
--- /dev/null
+++ b/tests/test_cusparse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+"""
+Unit tests for skcuda.cusparse
+"""
+
+from unittest import main, makeSuite, TestCase, TestSuite
+
+import pycuda.gpuarray as gpuarray
+from pycuda.tools import clear_context_caches, make_default_context
+import numpy as np
+
+import skcuda.cusparse as cusparse
+
+class test_cusparse(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.ctx = make_default_context()
+        cls.cusparse_handle = cusparse.cusparseCreate()
+
+    @classmethod
+    def tearDownClass(cls):
+        cusparse.cusparseDestroy(cls.cusparse_handle)
+        cls.ctx.pop()
+        clear_context_caches()
+
+    def setUp(self):
+        np.random.seed(23)    # For reproducible tests.
+
+    # Sgtsv2StridedBatch_bufferSizeExt
+    def test_cusparseSgtsv2StridedBatch_bufferSizeExt(self):
+        m = 5
+        batchCount = 5
+        batchStride = m
+
+        dl = np.zeros(m*batchCount).astype(np.float32)
+        d = np.zeros(m*batchCount).astype(np.float32)
+        du = np.zeros(m*batchCount).astype(np.float32)
+        x = np.zeros(m*batchCount).astype(np.float32)
+
+        for ii in range(batchCount):
+            dl[ii*batchStride+1:ii*batchStride+batchStride] = np.random.rand(m-1)
+            d[ii*batchStride:ii*batchStride+batchStride] = np.random.rand(m)
+            du[ii*batchStride:ii*batchStride+batchStride-1] = np.random.rand(m-1)
+            x[ii*batchStride:ii*batchStride+batchStride] = np.random.rand(m)
+
+        dl_gpu = gpuarray.to_gpu(dl)
+        d_gpu = gpuarray.to_gpu(d)
+        du_gpu = gpuarray.to_gpu(du)
+        x_gpu = gpuarray.to_gpu(x)
+
+        bufferSizeInBytes = cusparse.cusparseSgtsv2StridedBatch_bufferSizeExt(
+            self.cusparse_handle, m, dl_gpu.gpudata, d_gpu.gpudata,
+            du_gpu.gpudata, x_gpu.gpudata, batchCount, batchStride)
+
+def suite():
+    s = TestSuite()
+    s.addTest(test_cusparse('test_cusparseSgtsv2StridedBatch_bufferSizeExt'))
+    return s
+
+if __name__ == '__main__':
+    main(defaultTest = 'suite')

From 9e0af08a2674473d83868bde6c175d72998a1ce1 Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Fri, 10 Apr 2020 14:17:32 -0700
Subject: [PATCH 04/13] Support  cusparseSgtsv2StridedBatch + test

---
 skcuda/cusparse.py     | 80 +++++++++++++++++++++++++++++++++++++++++-
 tests/test_cusparse.py | 48 +++++++++++++++++--------
 2 files changed, 113 insertions(+), 15 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 30ccdd19..7437e2d8 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -453,4 +453,82 @@ def cusparseSgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount
         handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride,
         ctypes.byref(bufferSizeInBytes))
     cusparseCheckStatus(status)
-    return bufferSizeInBytes.value
\ No newline at end of file
+    return bufferSizeInBytes.value
+
+_libcusparse.cusparseSgtsv2StridedBatch.restype = int
+_libcusparse.cusparseSgtsv2StridedBatch.argtypes =\
+    [ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer):
+    """
+    Compute the solution of multiple tridiagonal linear systems.
+    
+    Solves multiple tridiagonal linear systems, for i=0,…,batchCount:
+        A(i) ∗ y(i) = x(i)
+    The coefficient matrix A of each of these tri-diagonal linear system is
+    defined with three vectors corresponding to its lower (dl), main (d), and
+    upper (du) matrix diagonals; the right-hand sides are stored in the dense
+    matrix X. Notice that solution Y overwrites right-hand-side matrix X on exit.
+    The different matrices are assumed to be of the same size and are stored with
+    a fixed batchStride in memory.
+
+    The routine does not perform any pivoting and uses a combination of the
+    Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to
+    find the solution. It achieves better performance when m is a power of 2.
+
+    Parameters
+    ----------
+    handle : ctypes.c_void_p
+        cuSPARSE context
+    m : int
+        Size of the linear system (must be >= 3)
+    dl : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the lower
+        diagonal of the tri-diagonal linear system. The lower diagonal dl(i)
+        that corresponds to the ith linear system starts at location
+        dl+batchStride*i in memory. Also, the first element of each lower
+        diagonal must be zero.
+    d : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the main
+        diagonal of the tri-diagonal linear system. The main diagonal d(i)
+        that corresponds to the ith linear system starts at location
+        d+batchStride*i in memory.
+    du : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the upper
+        diagonal of the tri-diagonal linear system. The upper diagonal du(i)
+        that corresponds to the ith linear system starts at location
+        du+batchStride*i in memory. Also, the last element of each upper
+        diagonal must be zero.
+    x : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array that contains the
+        right-hand-side of the tri-diagonal linear system. The
+        right-hand-side x(i) that corresponds to the ith linear system
+        starts at location x+batchStride*i in memory.
+    batchCount : int
+        Number of systems to solve.
+    batchStride : int
+        Stride (number of elements) that separates the vectors of every
+        system (must be at least m).
+    pBuffer: ctypes.c_void_p
+        Buffer allocated by the user, the size is return by gtsv2StridedBatch_bufferSizeExt
+
+    Returns
+    -------
+    bufferSizeInBytes : int
+        number of bytes of the buffer used in the gtsv2StridedBatch.
+
+    References
+    ----------
+    `cusparse<t>gtsv2StridedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch>`_
+    """
+    status = _libcusparse.cusparseSgtsv2StridedBatch(
+        handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride, int(pBuffer))
+    cusparseCheckStatus(status)
\ No newline at end of file
diff --git a/tests/test_cusparse.py b/tests/test_cusparse.py
index fa52ce1b..813e4969 100644
--- a/tests/test_cusparse.py
+++ b/tests/test_cusparse.py
@@ -4,14 +4,27 @@
 Unit tests for skcuda.cusparse
 """
 
-from unittest import main, makeSuite, TestCase, TestSuite
+from unittest import main, TestCase, TestSuite
 
+import pycuda
 import pycuda.gpuarray as gpuarray
 from pycuda.tools import clear_context_caches, make_default_context
 import numpy as np
 
 import skcuda.cusparse as cusparse
 
+def check_batch_tridiagonal(dl,d,du,x, y, m,batchCount,batchStride):
+    """
+    Check all solutions from batched tridiagonal routine
+    """
+    for ii in range(batchCount):
+        A_sys = np.diagflat(dl[ii*batchStride+1:ii*batchStride+m], -1) +\
+            np.diagflat(d[ii*batchStride:ii*batchStride+m], 0) + \
+            np.diagflat(du[ii*batchStride:ii*batchStride+m-1], 1)
+        x_sys = x[ii*batchStride:ii*batchStride+m]
+        y_sys = y[ii*batchStride:ii*batchStride+m]
+        assert(np.allclose(np.dot(A_sys,y_sys), x_sys, atol=1e-3))
+
 class test_cusparse(TestCase):
     @classmethod
     def setUpClass(cls):
@@ -28,21 +41,21 @@ def setUp(self):
         np.random.seed(23)    # For reproducible tests.
 
     # Sgtsv2StridedBatch_bufferSizeExt
-    def test_cusparseSgtsv2StridedBatch_bufferSizeExt(self):
-        m = 5
-        batchCount = 5
-        batchStride = m
+    def test_cusparseSgtsv2StridedBatch(self):
+        m = 6
+        batchCount = 9
+        batchStride = 9
 
-        dl = np.zeros(m*batchCount).astype(np.float32)
-        d = np.zeros(m*batchCount).astype(np.float32)
-        du = np.zeros(m*batchCount).astype(np.float32)
-        x = np.zeros(m*batchCount).astype(np.float32)
+        dl = np.zeros(batchStride*batchCount).astype(np.float32)
+        d = np.zeros(batchStride*batchCount).astype(np.float32)
+        du = np.zeros(batchStride*batchCount).astype(np.float32)
+        x = np.zeros(batchStride*batchCount).astype(np.float32)
 
         for ii in range(batchCount):
-            dl[ii*batchStride+1:ii*batchStride+batchStride] = np.random.rand(m-1)
-            d[ii*batchStride:ii*batchStride+batchStride] = np.random.rand(m)
-            du[ii*batchStride:ii*batchStride+batchStride-1] = np.random.rand(m-1)
-            x[ii*batchStride:ii*batchStride+batchStride] = np.random.rand(m)
+            dl[ii*batchStride+1:ii*batchStride+m] = np.random.rand(m-1)
+            d[ii*batchStride:ii*batchStride+m] = np.random.rand(m)
+            du[ii*batchStride:ii*batchStride+m-1] = np.random.rand(m-1)
+            x[ii*batchStride:ii*batchStride+m] = np.random.rand(m)
 
         dl_gpu = gpuarray.to_gpu(dl)
         d_gpu = gpuarray.to_gpu(d)
@@ -52,10 +65,17 @@ def test_cusparseSgtsv2StridedBatch_bufferSizeExt(self):
         bufferSizeInBytes = cusparse.cusparseSgtsv2StridedBatch_bufferSizeExt(
             self.cusparse_handle, m, dl_gpu.gpudata, d_gpu.gpudata,
             du_gpu.gpudata, x_gpu.gpudata, batchCount, batchStride)
+        pBuffer = pycuda.driver.mem_alloc(bufferSizeInBytes)
+
+        cusparse.cusparseSgtsv2StridedBatch(self.cusparse_handle, m,
+            dl_gpu.gpudata, d_gpu.gpudata, du_gpu.gpudata, x_gpu.gpudata,
+            batchCount, batchStride, pBuffer)
+
+        check_batch_tridiagonal(dl,d,du,x, x_gpu.get(), m,batchCount,batchStride)
 
 def suite():
     s = TestSuite()
-    s.addTest(test_cusparse('test_cusparseSgtsv2StridedBatch_bufferSizeExt'))
+    s.addTest(test_cusparse('test_cusparseSgtsv2StridedBatch'))
     return s
 
 if __name__ == '__main__':

From c483e1a8e2a063b53c05bdb9389a499c4cbb32cf Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Fri, 10 Apr 2020 14:19:08 -0700
Subject: [PATCH 05/13] Update cusparse linux_version_list

---
 skcuda/cusparse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 7437e2d8..1a58a5db 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -19,7 +19,7 @@
 from . import cuda
 
 # Load library:
-_version_list = [10.1, 10.0, 9.2, 9.1, 9.0, 8.0, 7.5, 7.0, 6.5, 6.0, 5.5, 5.0, 4.0]
+_linux_version_list = [10.2, 10.1, 10.0, 9.2, 9.1, 9.0, 8.0, 7.5, 7.0, 6.5, 6.0, 5.5, 5.0, 4.0]
 _win32_version_list = [10, 10, 100, 92, 91, 90, 80, 75, 70, 65, 60, 55, 50, 40]
 if 'linux' in sys.platform:
     _libcusparse_libname_list = ['libcusparse.so'] + \

From ca3a1eff0a2d267645041895c786d4a1d82dbd6b Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Sat, 11 Apr 2020 13:17:56 -0700
Subject: [PATCH 06/13] Support cusparseSgtsvInterleavedBatch + test

---
 skcuda/cusparse.py     | 145 +++++++++++++++++++++++++++++++++++++++--
 tests/test_cusparse.py |  82 ++++++++++++++++++-----
 2 files changed, 206 insertions(+), 21 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 1a58a5db..5f28c4c5 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -520,15 +520,148 @@ def cusparseSgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride,
     pBuffer: ctypes.c_void_p
         Buffer allocated by the user, the size is return by gtsv2StridedBatch_bufferSizeExt
 
-    Returns
-    -------
-    bufferSizeInBytes : int
-        number of bytes of the buffer used in the gtsv2StridedBatch.
-
     References
     ----------
     `cusparse<t>gtsv2StridedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch>`_
     """
     status = _libcusparse.cusparseSgtsv2StridedBatch(
         handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride, int(pBuffer))
-    cusparseCheckStatus(status)
\ No newline at end of file
+    cusparseCheckStatus(status)
+
+_libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.restype = int
+_libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.argtypes =\
+    [ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_int,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsvInterleavedBatch_bufferSizeExt(handle, algo, m, dl, d, du, x, batchCount):
+    """
+    Calculate size of work buffer used by cusparseSgtsvInterleavedBatch.
+
+    Parameters
+    ----------
+    handle : ctypes.c_void_p
+        cuSPARSE context
+    algo : int
+        algo = 0: cuThomas (unstable algorithm); algo = 1: LU with pivoting
+        (stable algorithm); algo = 2: QR (stable algorithm)
+    m : int
+        Size of the linear system
+    dl : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the lower
+        diagonal of the tri-diagonal linear system. The first element of each 
+        lower diagonal must be zero.
+    d : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the main
+        diagonal of the tri-diagonal linear system.
+    du : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the upper
+        diagonal of the tri-diagonal linear system. The last element of each
+        upper diagonal must be zero.
+    x : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array that contains the
+        right-hand-side of the tri-diagonal linear system.
+    batchCount : int
+        Number of systems to solve.
+    pBuffer: ctypes.c_void_p
+        Buffer allocated by the user, the size is return by gtsvInterleavedBatch_bufferSizeExt
+
+    References
+    ----------
+    `cusparse<t>gtsvInterleavedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsvInterleavedBatch>`_
+    """
+    pBufferSizeInBytes = ctypes.c_int()
+    status = _libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt(
+        handle, algo, m, int(dl), int(d), int(du), int(x), batchCount,
+        ctypes.byref(pBufferSizeInBytes))
+    cusparseCheckStatus(status)
+    return pBufferSizeInBytes.value
+
+_libcusparse.cusparseSgtsvInterleavedBatch.restype = int
+_libcusparse.cusparseSgtsvInterleavedBatch.argtypes =\
+    [ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_int,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBuffer):
+    """
+    Compute the solution of multiple tridiagonal linear systems.
+
+    Solves multiple tridiagonal linear systems, for i=0,…,batchCount:
+        A(i) ∗ y(i) = x(i)
+    The coefficient matrix A of each of these tri-diagonal linear system is
+    defined with three vectors corresponding to its lower (dl), main (d), and
+    upper (du) matrix diagonals; the right-hand sides are stored in the dense
+    matrix X. Notice that solution Y overwrites right-hand-side matrix X on exit.
+    The different matrices are assumed to be of the same size and are stored with
+    a fixed batchStride in memory.
+
+    Assuming A is of size m and base-1, dl, d and du are defined by the following formula:
+        dl(i) := A(i, i-1) for i=1,2,...,m
+    The first element of dl is out-of-bound (dl(1) := A(1,0)), so dl(1) = 0.
+        d(i) = A(i,i) for i=1,2,...,m
+        du(i) = A(i,i+1) for i=1,2,...,m
+    The last element of du is out-of-bound (du(m) := A(m,m+1)), so du(m) = 0.
+
+    The data layout is different from gtsvStridedBatch which aggregates all
+    matrices one after another. Instead, gtsvInterleavedBatch gathers
+    different matrices of the same element in a continous manner. If dl is
+    regarded as a 2-D array of size m-by-batchCount, dl(:,j) to store j-th
+    matrix. gtsvStridedBatch uses column-major while gtsvInterleavedBatch
+    uses row-major.
+
+    The routine provides three different algorithms, selected by parameter algo.
+    The first algorithm is cuThomas provided by Barcelona Supercomputing Center.
+    The second algorithm is LU with partial pivoting and last algorithm is QR.
+    From stability perspective, cuThomas is not numerically stable because it
+    does not have pivoting. LU with partial pivoting and QR are stable. From
+    performance perspective, LU with partial pivoting and QR is about 10% to 20%
+    slower than cuThomas.
+
+    Parameters
+    ----------
+    handle : ctypes.c_void_p
+        cuSPARSE context
+    algo : int
+        algo = 0: cuThomas (unstable algorithm); algo = 1: LU with pivoting
+        (stable algorithm); algo = 2: QR (stable algorithm)
+    m : int
+        Size of the linear system
+    dl : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the lower
+        diagonal of the tri-diagonal linear system. The first element of each 
+        lower diagonal must be zero.
+    d : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the main
+        diagonal of the tri-diagonal linear system.
+    du : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array containing the upper
+        diagonal of the tri-diagonal linear system. The last element of each
+        upper diagonal must be zero.
+    x : ctypes.c_void_p
+        Pointer to ${precision} ${real} dense array that contains the
+        right-hand-side of the tri-diagonal linear system.
+    batchCount : int
+        Number of systems to solve.
+    pBuffer: ctypes.c_void_p
+        Buffer allocated by the user, the size is return by gtsvInterleavedBatch_bufferSizeExt
+
+    References
+    ----------
+    `cusparse<t>gtsvInterleavedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsvInterleavedBatch>`_
+    """
+    status = _libcusparse.cusparseSgtsvInterleavedBatch(
+        handle, algo, m, int(dl), int(d), int(du), int(x), batchCount, int(pBuffer))
+    cusparseCheckStatus(status)
diff --git a/tests/test_cusparse.py b/tests/test_cusparse.py
index 813e4969..3856124a 100644
--- a/tests/test_cusparse.py
+++ b/tests/test_cusparse.py
@@ -13,17 +13,42 @@
 
 import skcuda.cusparse as cusparse
 
-def check_batch_tridiagonal(dl,d,du,x, y, m,batchCount,batchStride):
+def check_batch_tridiagonal(dl,d,du,x, y, m,batchCount,batchStride=None,atol=1e-8):
     """
     Check all solutions from batched tridiagonal routine
     """
+    if batchStride is None:
+        batchStride = m
+
     for ii in range(batchCount):
         A_sys = np.diagflat(dl[ii*batchStride+1:ii*batchStride+m], -1) +\
             np.diagflat(d[ii*batchStride:ii*batchStride+m], 0) + \
             np.diagflat(du[ii*batchStride:ii*batchStride+m-1], 1)
         x_sys = x[ii*batchStride:ii*batchStride+m]
         y_sys = y[ii*batchStride:ii*batchStride+m]
-        assert(np.allclose(np.dot(A_sys,y_sys), x_sys, atol=1e-3))
+        assert(np.allclose(np.dot(A_sys,y_sys), x_sys, atol=atol))
+
+def tridiagonal_system(m, batchCount, batchStride=None, seed=None):
+    """
+    Create a tridiagonal system of a given size
+    """
+    if batchStride is None:
+        batchStride = m
+    if seed is not None:
+        np.random.seed(seed)
+
+    dl = np.zeros(batchStride*batchCount).astype(np.float32)
+    d = np.zeros(batchStride*batchCount).astype(np.float32)
+    du = np.zeros(batchStride*batchCount).astype(np.float32)
+    x = np.zeros(batchStride*batchCount).astype(np.float32)
+
+    for ii in range(batchCount):
+        dl[ii*batchStride+1:ii*batchStride+m] = np.random.rand(m-1)
+        d[ii*batchStride:ii*batchStride+m] = np.random.rand(m)
+        du[ii*batchStride:ii*batchStride+m-1] = np.random.rand(m-1)
+        x[ii*batchStride:ii*batchStride+m] = np.random.rand(m)
+
+    return dl,d,du,x
 
 class test_cusparse(TestCase):
     @classmethod
@@ -40,22 +65,12 @@ def tearDownClass(cls):
     def setUp(self):
         np.random.seed(23)    # For reproducible tests.
 
-    # Sgtsv2StridedBatch_bufferSizeExt
     def test_cusparseSgtsv2StridedBatch(self):
         m = 6
         batchCount = 9
-        batchStride = 9
-
-        dl = np.zeros(batchStride*batchCount).astype(np.float32)
-        d = np.zeros(batchStride*batchCount).astype(np.float32)
-        du = np.zeros(batchStride*batchCount).astype(np.float32)
-        x = np.zeros(batchStride*batchCount).astype(np.float32)
+        batchStride = 11
 
-        for ii in range(batchCount):
-            dl[ii*batchStride+1:ii*batchStride+m] = np.random.rand(m-1)
-            d[ii*batchStride:ii*batchStride+m] = np.random.rand(m)
-            du[ii*batchStride:ii*batchStride+m-1] = np.random.rand(m-1)
-            x[ii*batchStride:ii*batchStride+m] = np.random.rand(m)
+        dl,d,du,x = tridiagonal_system(m, batchCount, batchStride, seed=23)
 
         dl_gpu = gpuarray.to_gpu(dl)
         d_gpu = gpuarray.to_gpu(d)
@@ -71,11 +86,48 @@ def test_cusparseSgtsv2StridedBatch(self):
             dl_gpu.gpudata, d_gpu.gpudata, du_gpu.gpudata, x_gpu.gpudata,
             batchCount, batchStride, pBuffer)
 
-        check_batch_tridiagonal(dl,d,du,x, x_gpu.get(), m,batchCount,batchStride)
+        sln = x_gpu.get()
+        # For unstable algorithms, need to loosen atol
+        check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount,batchStride, 1e-4)
+
+    def test_cusparseSgtsvInterleavedBatch(self):
+        m = 6
+        batchCount = 9
+
+        dl,d,du,x = tridiagonal_system(m, batchCount, seed=23)
+
+        # Convert to interleaved format, by switching from row-major order
+        # (numpy default) to column-major
+        dl_int = np.reshape(dl,(batchCount,m)).ravel('F')
+        d_int = np.reshape(d, (batchCount, m)).ravel('F')
+        du_int = np.reshape(du,(batchCount,m)).ravel('F')
+        x_int = np.reshape(x,(batchCount,m)).ravel('F')
+
+        for algo in range(3):
+            dl_int_gpu = gpuarray.to_gpu(dl_int)
+            d_int_gpu = gpuarray.to_gpu(d_int)
+            du_int_gpu = gpuarray.to_gpu(du_int)
+            x_int_gpu = gpuarray.to_gpu(x_int)
+
+            pBufferSizeInBytes = cusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt(self.cusparse_handle,
+                algo, m, dl_int_gpu.gpudata, d_int_gpu.gpudata,
+                du_int_gpu.gpudata, x_int_gpu.gpudata, batchCount)
+                
+            pBuffer = pycuda.driver.mem_alloc(pBufferSizeInBytes)
+
+            cusparse.cusparseSgtsvInterleavedBatch(self.cusparse_handle, algo, m,
+                dl_int_gpu.gpudata, d_int_gpu.gpudata, du_int_gpu.gpudata,
+                x_int_gpu.gpudata, batchCount, pBuffer)
+    
+            sln_int = x_int_gpu.get()
+            # Convert back from interleaved format
+            sln = np.reshape(sln_int,(m,batchCount)).ravel('F')
+            check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount, atol=1e-6)
 
 def suite():
     s = TestSuite()
     s.addTest(test_cusparse('test_cusparseSgtsv2StridedBatch'))
+    s.addTest(test_cusparse('test_cusparseSgtsvInterleavedBatch'))
     return s
 
 if __name__ == '__main__':

From 5ea00775ab278d5bd4448567d09ae7c278d8fdc0 Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Sat, 11 Apr 2020 14:24:08 -0700
Subject: [PATCH 07/13] support double precision batched tridiagonal

---
 skcuda/cusparse.py     | 150 ++++++++++++++++++++++++++---------------
 tests/test_cusparse.py |  74 ++++++++++++++++++--
 2 files changed, 165 insertions(+), 59 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 5f28c4c5..ecc408e0 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -389,21 +389,9 @@ def cusparseSdense2csr(handle, m, n, descrA, A, lda,
     # Unfinished
     pass
 
-_libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.restype = int
-_libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.argtypes =\
-    [ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_int,
-    ctypes.c_void_p
-    ]
-def cusparseSgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount, batchStride):
+gtsv2StridedBatch_bufferSizeExt_doc = Template(
     """
-    Calculate size of work buffer used by cusparseSgtsv2StridedBatch.
+    Calculate size of work buffer used by cusparse<t>gtsv2StridedBatch.
 
     Parameters
     ----------
@@ -448,26 +436,38 @@ def cusparseSgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount
     ----------
     `cusparse<t>gtsv2StridedBatch_bufferSizeExt <https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch_bufferSize>`_
     """
+)
+
+_libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.restype = int
+_libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.argtypes =\
+    [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p,
+    ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount, batchStride):
     bufferSizeInBytes = ctypes.c_int()
     status = _libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt(
         handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride,
         ctypes.byref(bufferSizeInBytes))
     cusparseCheckStatus(status)
     return bufferSizeInBytes.value
+cusparseSgtsv2StridedBatch_bufferSizeExt.__doc__ = \
+    gtsv2StridedBatch_bufferSizeExt_doc.substitute(precision='single precision', real='real')
 
-_libcusparse.cusparseSgtsv2StridedBatch.restype = int
-_libcusparse.cusparseSgtsv2StridedBatch.argtypes =\
-    [ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_int,
-    ctypes.c_void_p
-    ]
-def cusparseSgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer):
+_libcusparse.cusparseDgtsv2StridedBatch_bufferSizeExt.restype = int
+_libcusparse.cusparseDgtsv2StridedBatch_bufferSizeExt.argtypes =\
+    _libcusparse.cusparseSgtsv2StridedBatch_bufferSizeExt.argtypes
+def cusparseDgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount, batchStride):
+    bufferSizeInBytes = ctypes.c_int()
+    status = _libcusparse.cusparseDgtsv2StridedBatch_bufferSizeExt(
+        handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride,
+        ctypes.byref(bufferSizeInBytes))
+    cusparseCheckStatus(status)
+    return bufferSizeInBytes.value
+cusparseDgtsv2StridedBatch_bufferSizeExt.__doc__ = \
+    gtsv2StridedBatch_bufferSizeExt_doc.substitute(precision='double precision', real='real')
+
+gtsv2StridedBatch_doc = Template(
     """
     Compute the solution of multiple tridiagonal linear systems.
     
@@ -524,25 +524,34 @@ def cusparseSgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride,
     ----------
     `cusparse<t>gtsv2StridedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch>`_
     """
+)
+
+_libcusparse.cusparseSgtsv2StridedBatch.restype = int
+_libcusparse.cusparseSgtsv2StridedBatch.argtypes =\
+    [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p,
+    ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer):
     status = _libcusparse.cusparseSgtsv2StridedBatch(
         handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride, int(pBuffer))
     cusparseCheckStatus(status)
+cusparseSgtsv2StridedBatch.__doc__ = \
+    gtsv2StridedBatch_doc.substitute(precision='single precision', real='real')
+
+_libcusparse.cusparseDgtsv2StridedBatch.restype = int
+_libcusparse.cusparseDgtsv2StridedBatch.argtypes =\
+    _libcusparse.cusparseSgtsv2StridedBatch.argtypes
+def cusparseDgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer):
+    status = _libcusparse.cusparseDgtsv2StridedBatch(
+        handle, m, int(dl), int(d), int(du), int(x), batchCount, batchStride, int(pBuffer))
+    cusparseCheckStatus(status)
+cusparseDgtsv2StridedBatch.__doc__ = \
+    gtsv2StridedBatch_doc.substitute(precision='double precision', real='real')
 
-_libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.restype = int
-_libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.argtypes =\
-    [ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_int,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_void_p
-    ]
-def cusparseSgtsvInterleavedBatch_bufferSizeExt(handle, algo, m, dl, d, du, x, batchCount):
+gtsv2InterleavedBatch_bufferSizeExt_doc = Template(
     """
-    Calculate size of work buffer used by cusparseSgtsvInterleavedBatch.
+    Calculate size of work buffer used by cusparse<t>gtsvInterleavedBatch.
 
     Parameters
     ----------
@@ -576,26 +585,38 @@ def cusparseSgtsvInterleavedBatch_bufferSizeExt(handle, algo, m, dl, d, du, x, b
     ----------
     `cusparse<t>gtsvInterleavedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsvInterleavedBatch>`_
     """
+)
+
+_libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.restype = int
+_libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.argtypes =\
+    [ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_void_p,
+    ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsvInterleavedBatch_bufferSizeExt(handle, algo, m, dl, d, du, x, batchCount):
     pBufferSizeInBytes = ctypes.c_int()
     status = _libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt(
         handle, algo, m, int(dl), int(d), int(du), int(x), batchCount,
         ctypes.byref(pBufferSizeInBytes))
     cusparseCheckStatus(status)
     return pBufferSizeInBytes.value
+cusparseSgtsvInterleavedBatch_bufferSizeExt.__doc__ = \
+    gtsv2InterleavedBatch_bufferSizeExt_doc.substitute(precision='single precision', real='real')
 
-_libcusparse.cusparseSgtsvInterleavedBatch.restype = int
-_libcusparse.cusparseSgtsvInterleavedBatch.argtypes =\
-    [ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_int,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.c_int,
-    ctypes.c_void_p
-    ]
-def cusparseSgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBuffer):
+_libcusparse.cusparseDgtsvInterleavedBatch_bufferSizeExt.restype = int
+_libcusparse.cusparseDgtsvInterleavedBatch_bufferSizeExt.argtypes =\
+    _libcusparse.cusparseSgtsvInterleavedBatch_bufferSizeExt.argtypes
+def cusparseDgtsvInterleavedBatch_bufferSizeExt(handle, algo, m, dl, d, du, x, batchCount):
+    pBufferSizeInBytes = ctypes.c_int()
+    status = _libcusparse.cusparseDgtsvInterleavedBatch_bufferSizeExt(
+        handle, algo, m, int(dl), int(d), int(du), int(x), batchCount,
+        ctypes.byref(pBufferSizeInBytes))
+    cusparseCheckStatus(status)
+    return pBufferSizeInBytes.value
+cusparseDgtsvInterleavedBatch_bufferSizeExt.__doc__ = \
+    gtsv2InterleavedBatch_bufferSizeExt_doc.substitute(precision='double precision', real='real')
+
+gtsvInterleavedBatch_doc = Template(
     """
     Compute the solution of multiple tridiagonal linear systems.
 
@@ -662,6 +683,27 @@ def cusparseSgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBu
     ----------
     `cusparse<t>gtsvInterleavedBatch <https://docs.nvidia.com/cuda/cusparse/index.html#gtsvInterleavedBatch>`_
     """
+)
+
+_libcusparse.cusparseSgtsvInterleavedBatch.restype = int
+_libcusparse.cusparseSgtsvInterleavedBatch.argtypes =\
+    [ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_void_p,
+    ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int,
+    ctypes.c_void_p
+    ]
+def cusparseSgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBuffer):
     status = _libcusparse.cusparseSgtsvInterleavedBatch(
         handle, algo, m, int(dl), int(d), int(du), int(x), batchCount, int(pBuffer))
     cusparseCheckStatus(status)
+cusparseSgtsvInterleavedBatch.__doc__ = \
+    gtsvInterleavedBatch_doc.substitute(precision='single precision', real='real')
+
+_libcusparse.cusparseDgtsvInterleavedBatch.restype = int
+_libcusparse.cusparseDgtsvInterleavedBatch.argtypes =\
+    _libcusparse.cusparseSgtsvInterleavedBatch.argtypes
+def cusparseDgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBuffer):
+    status = _libcusparse.cusparseDgtsvInterleavedBatch(
+        handle, algo, m, int(dl), int(d), int(du), int(x), batchCount, int(pBuffer))
+    cusparseCheckStatus(status)
+cusparseDgtsvInterleavedBatch.__doc__ = \
+    gtsvInterleavedBatch_doc.substitute(precision='double precision', real='real')
\ No newline at end of file
diff --git a/tests/test_cusparse.py b/tests/test_cusparse.py
index 3856124a..caac2c86 100644
--- a/tests/test_cusparse.py
+++ b/tests/test_cusparse.py
@@ -28,7 +28,8 @@ def check_batch_tridiagonal(dl,d,du,x, y, m,batchCount,batchStride=None,atol=1e-
         y_sys = y[ii*batchStride:ii*batchStride+m]
         assert(np.allclose(np.dot(A_sys,y_sys), x_sys, atol=atol))
 
-def tridiagonal_system(m, batchCount, batchStride=None, seed=None):
+def tridiagonal_system(m, batchCount, batchStride=None, seed=None,
+                       dtype=np.float32):
     """
     Create a tridiagonal system of a given size
     """
@@ -37,10 +38,10 @@ def tridiagonal_system(m, batchCount, batchStride=None, seed=None):
     if seed is not None:
         np.random.seed(seed)
 
-    dl = np.zeros(batchStride*batchCount).astype(np.float32)
-    d = np.zeros(batchStride*batchCount).astype(np.float32)
-    du = np.zeros(batchStride*batchCount).astype(np.float32)
-    x = np.zeros(batchStride*batchCount).astype(np.float32)
+    dl = np.zeros(batchStride*batchCount).astype(dtype)
+    d = np.zeros(batchStride*batchCount).astype(dtype)
+    du = np.zeros(batchStride*batchCount).astype(dtype)
+    x = np.zeros(batchStride*batchCount).astype(dtype)
 
     for ii in range(batchCount):
         dl[ii*batchStride+1:ii*batchStride+m] = np.random.rand(m-1)
@@ -90,6 +91,32 @@ def test_cusparseSgtsv2StridedBatch(self):
         # For unstable algorithms, need to loosen atol
         check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount,batchStride, 1e-4)
 
+    def test_cusparseDgtsv2StridedBatch(self):
+        m = 6
+        batchCount = 9
+        batchStride = 11
+
+        dl,d,du,x = tridiagonal_system(m, batchCount, batchStride, seed=23,
+                                       dtype=np.float64)
+
+        dl_gpu = gpuarray.to_gpu(dl)
+        d_gpu = gpuarray.to_gpu(d)
+        du_gpu = gpuarray.to_gpu(du)
+        x_gpu = gpuarray.to_gpu(x)
+
+        bufferSizeInBytes = cusparse.cusparseDgtsv2StridedBatch_bufferSizeExt(
+            self.cusparse_handle, m, dl_gpu.gpudata, d_gpu.gpudata,
+            du_gpu.gpudata, x_gpu.gpudata, batchCount, batchStride)
+        pBuffer = pycuda.driver.mem_alloc(bufferSizeInBytes)
+
+        cusparse.cusparseDgtsv2StridedBatch(self.cusparse_handle, m,
+            dl_gpu.gpudata, d_gpu.gpudata, du_gpu.gpudata, x_gpu.gpudata,
+            batchCount, batchStride, pBuffer)
+
+        sln = x_gpu.get()
+        # For unstable algorithms, need to loosen atol
+        check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount,batchStride)
+
     def test_cusparseSgtsvInterleavedBatch(self):
         m = 6
         batchCount = 9
@@ -124,10 +151,47 @@ def test_cusparseSgtsvInterleavedBatch(self):
             sln = np.reshape(sln_int,(m,batchCount)).ravel('F')
             check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount, atol=1e-6)
 
+    def test_cusparseDgtsvInterleavedBatch(self):
+        m = 6
+        batchCount = 9
+
+        dl,d,du,x = tridiagonal_system(m, batchCount, seed=23,
+                                       dtype=np.float64)
+
+        # Convert to interleaved format, by switching from row-major order
+        # (numpy default) to column-major
+        dl_int = np.reshape(dl,(batchCount,m)).ravel('F')
+        d_int = np.reshape(d, (batchCount, m)).ravel('F')
+        du_int = np.reshape(du,(batchCount,m)).ravel('F')
+        x_int = np.reshape(x,(batchCount,m)).ravel('F')
+
+        for algo in range(3):
+            dl_int_gpu = gpuarray.to_gpu(dl_int)
+            d_int_gpu = gpuarray.to_gpu(d_int)
+            du_int_gpu = gpuarray.to_gpu(du_int)
+            x_int_gpu = gpuarray.to_gpu(x_int)
+
+            pBufferSizeInBytes = cusparse.cusparseDgtsvInterleavedBatch_bufferSizeExt(self.cusparse_handle,
+                algo, m, dl_int_gpu.gpudata, d_int_gpu.gpudata,
+                du_int_gpu.gpudata, x_int_gpu.gpudata, batchCount)
+                
+            pBuffer = pycuda.driver.mem_alloc(pBufferSizeInBytes)
+
+            cusparse.cusparseDgtsvInterleavedBatch(self.cusparse_handle, algo, m,
+                dl_int_gpu.gpudata, d_int_gpu.gpudata, du_int_gpu.gpudata,
+                x_int_gpu.gpudata, batchCount, pBuffer)
+    
+            sln_int = x_int_gpu.get()
+            # Convert back from interleaved format
+            sln = np.reshape(sln_int,(m,batchCount)).ravel('F')
+            check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount)
+
 def suite():
     s = TestSuite()
     s.addTest(test_cusparse('test_cusparseSgtsv2StridedBatch'))
+    s.addTest(test_cusparse('test_cusparseDgtsv2StridedBatch'))
     s.addTest(test_cusparse('test_cusparseSgtsvInterleavedBatch'))
+    s.addTest(test_cusparse('test_cusparseDgtsvInterleavedBatch'))
     return s
 
 if __name__ == '__main__':

From 7ca4deecc43ab05714586095d82ec8d1ab01fb70 Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@email.arizona.edu>
Date: Sat, 11 Apr 2020 18:55:36 -0700
Subject: [PATCH 08/13] Add cusparse to documentation

---
 docs/source/reference.rst | 1 +
 skcuda/cusparse.py        | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/source/reference.rst b/docs/source/reference.rst
index c8e59ccc..c3ef351a 100644
--- a/docs/source/reference.rst
+++ b/docs/source/reference.rst
@@ -13,6 +13,7 @@ Library Wrapper Routines
    reference_cusolver
    reference_cula
    reference_pcula
+   reference_cusparse
 
 High-Level Routines
 -------------------
diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index ecc408e0..3a67ff03 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -200,6 +200,7 @@ def cusparseDestroy(handle):
 _libcusparse.cusparseGetVersion.restype = int
 _libcusparse.cusparseGetVersion.argtypes = [ctypes.c_int,
                                             ctypes.c_void_p]
+# XXX: Test
 def cusparseGetVersion(handle):
     """
     Return CUSPARSE library version.
@@ -227,6 +228,7 @@ def cusparseGetVersion(handle):
 _libcusparse.cusparseSetStream.restype = int
 _libcusparse.cusparseSetStream.argtypes = [ctypes.c_int,
                                                  ctypes.c_int]
+# XXX: Test. Check for cusparseGetStream
 def cusparseSetStream(handle, id):
     """
     Sets the CUSPARSE stream in which kernels will run.
@@ -706,4 +708,4 @@ def cusparseDgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBu
         handle, algo, m, int(dl), int(d), int(du), int(x), batchCount, int(pBuffer))
     cusparseCheckStatus(status)
 cusparseDgtsvInterleavedBatch.__doc__ = \
-    gtsvInterleavedBatch_doc.substitute(precision='double precision', real='real')
\ No newline at end of file
+    gtsvInterleavedBatch_doc.substitute(precision='double precision', real='real')

From e77bc6961340cce86efd47a1886c58a2b639c9f1 Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Sun, 12 Apr 2020 13:55:17 -0700
Subject: [PATCH 09/13] cusparse Get/SetStream and GetVersion

---
 skcuda/cusparse.py     | 47 +++++++++++++++++++++++++-----------------
 tests/test_cusparse.py | 13 ++++++++++++
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index ecc408e0..64ba2a60 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -149,9 +149,7 @@ def cusparseCheckStatus(status):
     See Also
     --------
     cusparseExceptions
-
     """
-
     if status != 0:
         try:
             raise cusparseExceptions[status]
@@ -171,9 +169,7 @@ def cusparseCreate():
     -------
     handle : int
         CUSPARSE library context.
-
     """
-
     handle = ctypes.c_void_p()
     status = _libcusparse.cusparseCreate(ctypes.byref(handle))
     cusparseCheckStatus(status)
@@ -185,21 +181,18 @@ def cusparseDestroy(handle):
     """
     Release CUSPARSE resources.
 
-    Releases hardware resources used by CUSPARSE
+    Releases hardware resources used by CUSPARSE.
 
     Parameters
     ----------
     handle : int
         CUSPARSE library context.
-
     """
-
     status = _libcusparse.cusparseDestroy(handle)
     cusparseCheckStatus(status)
 
 _libcusparse.cusparseGetVersion.restype = int
-_libcusparse.cusparseGetVersion.argtypes = [ctypes.c_int,
-                                            ctypes.c_void_p]
+_libcusparse.cusparseGetVersion.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
 def cusparseGetVersion(handle):
     """
     Return CUSPARSE library version.
@@ -215,9 +208,7 @@ def cusparseGetVersion(handle):
     -------
     version : int
         CUSPARSE library version number.
-
     """
-
     version = ctypes.c_int()
     status = _libcusparse.cusparseGetVersion(handle,
                                              ctypes.byref(version))
@@ -225,8 +216,7 @@ def cusparseGetVersion(handle):
     return version.value
 
 _libcusparse.cusparseSetStream.restype = int
-_libcusparse.cusparseSetStream.argtypes = [ctypes.c_int,
-                                                 ctypes.c_int]
+_libcusparse.cusparseSetStream.argtypes = [ctypes.c_void_p, ctypes.c_int]
 def cusparseSetStream(handle, id):
     """
     Sets the CUSPARSE stream in which kernels will run.
@@ -237,11 +227,30 @@ def cusparseSetStream(handle, id):
         CUSPARSE library context.
     id : int
         Stream ID.
+    """
+    status = _libcusparse.cusparseSetStream(handle, id)
+    cusparseCheckStatus(status)
 
+_libcusparse.cusparseGetStream.restype = int
+_libcusparse.cusparseGetStream.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
+def cusparseGetStream(handle):
     """
+    Gets the CUSPARSE stream in which kernels will run.
 
-    status = _libcusparse.cusparseSetStream(handle, id)
+    Parameters
+    ----------
+    handle : int
+        CUSPARSE library context.
+
+    Returns
+    -------
+    handle : int
+        CUSPARSE library context.
+    """
+    id = ctypes.c_int()
+    status = _libcusparse.cusparseGetStream(handle, ctypes.byref(id))
     cusparseCheckStatus(status)
+    return id.value
 
 _libcusparse.cusparseCreateMatDescr.restype = int
 _libcusparse.cusparseCreateMatDescr.argtypes = [cusparseMatDescr]
@@ -395,7 +404,7 @@ def cusparseSdense2csr(handle, m, n, descrA, A, lda,
 
     Parameters
     ----------
-    handle : ctypes.c_void_p
+    handle : int
         cuSPARSE context
     m : int
         Size of the linear system (must be >= 3)
@@ -486,7 +495,7 @@ def cusparseDgtsv2StridedBatch_bufferSizeExt(handle, m, dl, d, du, x, batchCount
 
     Parameters
     ----------
-    handle : ctypes.c_void_p
+    handle : int
         cuSPARSE context
     m : int
         Size of the linear system (must be >= 3)
@@ -555,7 +564,7 @@ def cusparseDgtsv2StridedBatch(handle, m, dl, d, du, x, batchCount, batchStride,
 
     Parameters
     ----------
-    handle : ctypes.c_void_p
+    handle : int
         cuSPARSE context
     algo : int
         algo = 0: cuThomas (unstable algorithm); algo = 1: LU with pivoting
@@ -653,7 +662,7 @@ def cusparseDgtsvInterleavedBatch_bufferSizeExt(handle, algo, m, dl, d, du, x, b
 
     Parameters
     ----------
-    handle : ctypes.c_void_p
+    handle : int
         cuSPARSE context
     algo : int
         algo = 0: cuThomas (unstable algorithm); algo = 1: LU with pivoting
@@ -706,4 +715,4 @@ def cusparseDgtsvInterleavedBatch(handle, algo, m, dl, d, du, x, batchCount, pBu
         handle, algo, m, int(dl), int(d), int(du), int(x), batchCount, int(pBuffer))
     cusparseCheckStatus(status)
 cusparseDgtsvInterleavedBatch.__doc__ = \
-    gtsvInterleavedBatch_doc.substitute(precision='double precision', real='real')
\ No newline at end of file
+    gtsvInterleavedBatch_doc.substitute(precision='double precision', real='real')
diff --git a/tests/test_cusparse.py b/tests/test_cusparse.py
index caac2c86..469bfc68 100644
--- a/tests/test_cusparse.py
+++ b/tests/test_cusparse.py
@@ -185,6 +185,16 @@ def test_cusparseDgtsvInterleavedBatch(self):
             # Convert back from interleaved format
             sln = np.reshape(sln_int,(m,batchCount)).ravel('F')
             check_batch_tridiagonal(dl,d,du,x, sln, m,batchCount)
+        
+    def test_cusparseGetSetStream(self):
+        initial_stream = cusparse.cusparseGetStream(self.cusparse_handle)
+        # Switch stream
+        cusparse.cusparseSetStream(self.cusparse_handle, initial_stream+1)
+        final_stream = cusparse.cusparseGetStream(self.cusparse_handle)
+        assert(final_stream == initial_stream+1)
+
+    def test_cusparseGetVersion(self):
+        cusparse.cusparseGetVersion(self.cusparse_handle)
 
 def suite():
     s = TestSuite()
@@ -192,6 +202,9 @@ def suite():
     s.addTest(test_cusparse('test_cusparseDgtsv2StridedBatch'))
     s.addTest(test_cusparse('test_cusparseSgtsvInterleavedBatch'))
     s.addTest(test_cusparse('test_cusparseDgtsvInterleavedBatch'))
+
+    s.addTest(test_cusparse('test_cusparseGetSetStream'))
+    s.addTest(test_cusparse('test_cusparseGetVersion'))
     return s
 
 if __name__ == '__main__':

From f2c76dc824d6bb5ec5e1ed2975dddac31e07918e Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@email.arizona.edu>
Date: Sun, 12 Apr 2020 14:08:52 -0700
Subject: [PATCH 10/13] Add missing cusparse documenation file

---
 docs/source/reference_cusparse.rst | 43 ++++++++++++++++++++++++++++++
 skcuda/cusparse.py                 | 12 ---------
 2 files changed, 43 insertions(+), 12 deletions(-)
 create mode 100644 docs/source/reference_cusparse.rst

diff --git a/docs/source/reference_cusparse.rst b/docs/source/reference_cusparse.rst
new file mode 100644
index 00000000..e14a5ab3
--- /dev/null
+++ b/docs/source/reference_cusparse.rst
@@ -0,0 +1,43 @@
+.. -*- rst -*-
+
+.. currentmodule:: skcuda.cusparse
+
+CUSPARSE Routines
+=================
+
+Helper Routines
+---------------
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   cusparseCreate
+   cusparseDestroy
+   cusparseGetVersion
+   cusparseSetStream
+   cusparseGetStream
+
+Wrapper Routines
+----------------
+
+Single Precision Routines
+^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   cusparseSgtsv2StridedBatch_bufferSizeExt
+   cusparseSgtsv2StridedBatch
+   cusparseSgtsvInterleavedBatch_bufferSizeExt
+   cusparseSgtsvInterleavedBatch
+
+Double Precision Routines
+^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   cusparseDgtsv2StridedBatch_bufferSizeExt
+   cusparseDgtsv2StridedBatch
+   cusparseDgtsvInterleavedBatch_bufferSizeExt
+   cusparseDgtsvInterleavedBatch
diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 38306c17..64ba2a60 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -192,13 +192,7 @@ def cusparseDestroy(handle):
     cusparseCheckStatus(status)
 
 _libcusparse.cusparseGetVersion.restype = int
-<<<<<<< HEAD
 _libcusparse.cusparseGetVersion.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
-=======
-_libcusparse.cusparseGetVersion.argtypes = [ctypes.c_int,
-                                            ctypes.c_void_p]
-# XXX: Test
->>>>>>> 7ca4deecc43ab05714586095d82ec8d1ab01fb70
 def cusparseGetVersion(handle):
     """
     Return CUSPARSE library version.
@@ -222,13 +216,7 @@ def cusparseGetVersion(handle):
     return version.value
 
 _libcusparse.cusparseSetStream.restype = int
-<<<<<<< HEAD
 _libcusparse.cusparseSetStream.argtypes = [ctypes.c_void_p, ctypes.c_int]
-=======
-_libcusparse.cusparseSetStream.argtypes = [ctypes.c_int,
-                                                 ctypes.c_int]
-# XXX: Test. Check for cusparseGetStream
->>>>>>> 7ca4deecc43ab05714586095d82ec8d1ab01fb70
 def cusparseSetStream(handle, id):
     """
     Sets the CUSPARSE stream in which kernels will run.

From c5f779f97f2ed602e87ddcb7f8905aab58b78dcf Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@email.arizona.edu>
Date: Sun, 12 Apr 2020 14:21:29 -0700
Subject: [PATCH 11/13] Remove unfinished cusparse methods

---
 skcuda/cusparse.py | 146 ---------------------------------------------
 1 file changed, 146 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 64ba2a60..164155f8 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -252,152 +252,6 @@ def cusparseGetStream(handle):
     cusparseCheckStatus(status)
     return id.value
 
-_libcusparse.cusparseCreateMatDescr.restype = int
-_libcusparse.cusparseCreateMatDescr.argtypes = [cusparseMatDescr]
-def cusparseCreateMatDescr():
-    """
-    Initialize a sparse matrix descriptor.
-
-    Initializes the `MatrixType` and `IndexBase` fields of the matrix
-    descriptor to the default values `CUSPARSE_MATRIX_TYPE_GENERAL`
-    and `CUSPARSE_INDEX_BASE_ZERO`.
-
-    Returns
-    -------
-    desc : cusparseMatDescr
-        Matrix descriptor.
-
-    """
-
-    desc = cusparseMatrixDesc()
-    status = _libcusparse.cusparseCreateMatDescr(ctypes.byref(desc))
-    cusparseCheckStatus(status)
-    return desc
-
-_libcusparse.cusparseDestroyMatDescr.restype = int
-_libcusparse.cusparseDestroyMatDescr.argtypes = [ctypes.c_int]
-def cusparseDestroyMatDescr(desc):
-    """
-    Releases the memory allocated for the matrix descriptor.
-
-    Parameters
-    ----------
-    desc : cusparseMatDescr
-        Matrix descriptor.
-
-    """
-
-    status = _libcusparse.cusparseDestroyMatDescr(desc)
-    cusparseCheckStatus(status)
-
-_libcusparse.cusparseSetMatType.restype = int
-_libcusparse.cusparseSetMatType.argtypes = [cusparseMatDescr,
-                                            ctypes.c_int]
-def cusparseSetMatType(desc, type):
-    """
-    Sets the matrix type of the specified matrix.
-
-    Parameters
-    ----------
-    desc : cusparseMatDescr
-        Matrix descriptor.
-    type : int
-        Matrix type.
-
-    """
-
-    status = _libcusparse.cusparseSetMatType(desc, type)
-    cusparseCheckStatus(status)
-
-_libcusparse.cusparseGetMatType.restype = int
-_libcusparse.cusparseGetMatType.argtypes = [cusparseMatDescr]    
-def cusparseGetMatType(desc):
-    """
-    Gets the matrix type of the specified matrix.
-
-    Parameters
-    ----------
-    desc : cusparseMatDescr
-        Matrix descriptor.
-
-    Returns
-    -------
-    type : int
-        Matrix type.
-
-    """
-
-    return _libcusparse.cusparseGetMatType(desc)
-
-# Format conversion functions:
-_libcusparse.cusparseSnnz.restype = int
-_libcusparse.cusparseSnnz.argtypes = [ctypes.c_int,
-                                      ctypes.c_int,
-                                      ctypes.c_int,
-                                      ctypes.c_int,
-                                      cusparseMatDescr,
-                                      ctypes.c_void_p,
-                                      ctypes.c_int,
-                                      ctypes.c_void_p,
-                                      ctypes.c_void_p]
-def cusparseSnnz(handle, dirA, m, n, descrA, A, lda, 
-                 nnzPerRowColumn, nnzTotalDevHostPtr):
-    """
-    Compute number of non-zero elements per row, column, or dense matrix.
-
-    Parameters
-    ----------
-    handle : int
-        CUSPARSE library context.
-    dirA : int
-        Data direction of elements.
-    m : int
-        Rows in A.
-    n : int
-        Columns in A.
-    descrA : cusparseMatDescr
-        Matrix descriptor.
-    A : pycuda.gpuarray.GPUArray
-        Dense matrix of dimensions (lda, n).
-    lda : int
-        Leading dimension of A.
-    
-    Returns
-    -------
-    nnzPerRowColumn : pycuda.gpuarray.GPUArray
-        Array of length m or n containing the number of 
-        non-zero elements per row or column, respectively.
-    nnzTotalDevHostPtr : pycuda.gpuarray.GPUArray
-        Total number of non-zero elements in device or host memory.
-
-    """
-
-    # Unfinished:
-    nnzPerRowColumn = gpuarray.empty()
-    nnzTotalDevHostPtr = gpuarray.empty()
-
-    status = _libcusparse.cusparseSnnz(handle, dirA, m, n, 
-                                       descrA, int(A), lda,
-                                       int(nnzPerRowColumn), int(nnzTotalDevHostPtr))
-    cusparseCheckStatus(status)
-    return nnzPerVector, nnzHost
-
-_libcusparse.cusparseSdense2csr.restype = int
-_libcusparse.cusparseSdense2csr.argtypes = [ctypes.c_int,
-                                            ctypes.c_int,
-                                            ctypes.c_int,
-                                            cusparseMatDescr,
-                                            ctypes.c_void_p,
-                                            ctypes.c_int,
-                                            ctypes.c_void_p,
-                                            ctypes.c_void_p,
-                                            ctypes.c_void_p,
-                                            ctypes.c_void_p]
-def cusparseSdense2csr(handle, m, n, descrA, A, lda, 
-                       nnzPerRow, csrValA, csrRowPtrA, csrColIndA):
-    # Unfinished
-    pass
-
 gtsv2StridedBatch_bufferSizeExt_doc = Template(
     """
     Calculate size of work buffer used by cusparse<t>gtsv2StridedBatch.

From 263ff24423f298ef726287b6a3b398d7e7bddf88 Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@arizona.edu>
Date: Sun, 12 Apr 2020 14:27:47 -0700
Subject: [PATCH 12/13] Remove unused imports in cusparse

---
 skcuda/cusparse.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index 164155f8..b700401e 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -8,13 +8,10 @@
 
 from __future__ import absolute_import
 
-import ctypes.util
+import ctypes
 import platform
 from string import Template
 import sys
-import warnings
-
-import numpy as np
 
 from . import cuda
 

From 13a5fdd10252df792a324dfa33139f4e22d30adb Mon Sep 17 00:00:00 2001
From: Ryan Gutenkunst <rgutenk@email.arizona.edu>
Date: Sat, 2 May 2020 15:22:17 -0700
Subject: [PATCH 13/13] Remove more unused code in cusparse

---
 skcuda/cusparse.py | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/skcuda/cusparse.py b/skcuda/cusparse.py
index b700401e..a8442b6e 100644
--- a/skcuda/cusparse.py
+++ b/skcuda/cusparse.py
@@ -96,41 +96,6 @@ class cusparseStatusMatrixTypeNotSupported(cusparseError):
     8: cusparseStatusMatrixTypeNotSupported,
     }
 
-# Matrix types:
-CUSPARSE_MATRIX_TYPE_GENERAL = 0
-CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1
-CUSPARSE_MATRIX_TYPE_HERMITIAN = 2
-CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3
-
-CUSPARSE_FILL_MODE_LOWER = 0
-CUSPARSE_FILL_MODE_UPPER = 1
-
-# Whether or not a matrix' diagonal entries are unity:
-CUSPARSE_DIAG_TYPE_NON_UNIT = 0
-CUSPARSE_DIAG_TYPE_UNIT = 1
-
-# Matrix index bases:
-CUSPARSE_INDEX_BASE_ZERO = 0
-CUSPARSE_INDEX_BASE_ONE = 1
-
-# Operation types:
-CUSPARSE_OPERATION_NON_TRANSPOSE = 0
-CUSPARSE_OPERATION_TRANSPOSE = 1
-CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2
-
-# Whether or not to parse elements of a dense matrix row or column-wise.
-CUSPARSE_DIRECTION_ROW = 0
-CUSPARSE_DIRECTION_COLUMN = 1
-
-# Helper functions:
-class cusparseMatDescr(ctypes.Structure):
-    _fields_ = [
-        ('MatrixType', ctypes.c_int),
-        ('FillMode', ctypes.c_int),
-        ('DiagType', ctypes.c_int),
-        ('IndexBase', ctypes.c_int)
-        ]
-
 def cusparseCheckStatus(status):
     """
     Raise CUSPARSE exception