Merge branch 'master' of https://github.com/rusty1s/pytorch_sparse

rusty1s · rusty1s · commit 6a6e86c76c95 · 2018-08-07T08:38:28.000+02:00
diff --git a/.coveragerc b/.coveragerc
@@ -1,5 +1,4 @@
 [report]
 exclude_lines =
     pragma: no cover
-    def backward
     cuda
diff --git a/cuda/matmul.cpp b/cuda/matmul.cpp
diff --git a/cuda/spspmm.cpp b/cuda/spspmm.cpp
@@ -0,0 +1,21 @@
+#include <torch/torch.h>
+
+#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be CUDA tensor")
+
+std::tuple<at::Tensor, at::Tensor>
+spspmm_cuda(at::Tensor indexA, at::Tensor valueA, at::Tensor indexB,
+            at::Tensor valueB, int m, int k, int n);
+
+std::tuple<at::Tensor, at::Tensor> spspmm(at::Tensor indexA, at::Tensor valueA,
+                                          at::Tensor indexB, at::Tensor valueB,
+                                          int m, int k, int n) {
+  CHECK_CUDA(indexA);
+  CHECK_CUDA(valueA);
+  CHECK_CUDA(indexB);
+  CHECK_CUDA(valueB);
+  return spspmm_cuda(indexA, valueA, indexB, valueB, m, k, n);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("spspmm", &spspmm, "Sparse-Sparse Matrix Multiplication (CUDA)");
+}
diff --git a/cuda/spspmm_kernel.cu b/cuda/spspmm_kernel.cu
@@ -27,28 +27,32 @@ static void init_cusparse() {
   }
 }
 
-std::tuple<at::Tensor, at::Tensor> spspmm_cuda(at::Tensor A, at::Tensor B) {
+std::tuple<at::Tensor, at::Tensor>
+spspmm_cuda(at::Tensor indexA, at::Tensor valueA, at::Tensor indexB,
+            at::Tensor valueB, int m, int k, int n) {
   init_cusparse();
 
-  auto m = A.size(0);
-  auto k = A.size(1);
-  auto n = B.size(1);
+  indexA = indexA.contiguous();
+  valueA = valueA.contiguous();
+  indexB = indexB.contiguous();
+  valueB = valueB.contiguous();
 
-  auto nnzA = A._nnz();
-  auto nnzB = B._nnz();
+  auto nnzA = valueA.size(0);
+  auto nnzB = valueB.size(0);
 
-  auto valueA = A._values();
-  auto indexA = A._indices().toType(at::kInt);
-  auto row_ptrA = at::empty(indexA.type(), {m + 1});
+  indexA = indexA.toType(at::kInt);
+  indexB = indexB.toType(at::kInt);
+
+  // Convert A to CSR format.
+  auto row_ptrA = at::empty(m + 1, indexA.type());
   cusparseXcoo2csr(cusparse_handle, indexA[0].data<int>(), nnzA, k,
                    row_ptrA.data<int>(), CUSPARSE_INDEX_BASE_ZERO);
   auto colA = indexA[1];
   cudaMemcpy(row_ptrA.data<int>() + m, &nnzA, sizeof(int),
              cudaMemcpyHostToDevice);
 
-  auto valueB = B._values();
-  auto indexB = B._indices().toType(at::kInt);
-  auto row_ptrB = at::empty(indexB.type(), {k + 1});
+  // Convert B to CSR format.
+  auto row_ptrB = at::empty(k + 1, indexB.type());
   cusparseXcoo2csr(cusparse_handle, indexB[0].data<int>(), nnzB, k,
                    row_ptrB.data<int>(), CUSPARSE_INDEX_BASE_ZERO);
   auto colB = indexB[1];
@@ -61,14 +65,14 @@ std::tuple<at::Tensor, at::Tensor> spspmm_cuda(at::Tensor A, at::Tensor B) {
   cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
 
   int nnzC;
-  auto row_ptrC = at::empty(indexA.type(), {m + 1});
+  auto row_ptrC = at::empty(m + 1, indexB.type());
   cusparseXcsrgemmNnz(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                       CUSPARSE_OPERATION_NON_TRANSPOSE, m, n, k, descr, nnzA,
                       row_ptrA.data<int>(), colA.data<int>(), descr, nnzB,
                       row_ptrB.data<int>(), colB.data<int>(), descr,
                       row_ptrC.data<int>(), &nnzC);
-  auto colC = at::empty(indexA.type(), {nnzC});
-  auto valueC = at::empty(valueA.type(), {nnzC});
+  auto colC = at::empty(nnzC, indexA.type());
+  auto valueC = at::empty(nnzC, valueA.type());
 
   CSRGEMM(valueC.type(), cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
           CUSPARSE_OPERATION_NON_TRANSPOSE, m, n, k, descr, nnzA,
@@ -77,7 +81,7 @@ std::tuple<at::Tensor, at::Tensor> spspmm_cuda(at::Tensor A, at::Tensor B) {
           colB.data<int>(), descr, valueC.data<scalar_t>(),
           row_ptrC.data<int>(), colC.data<int>());
 
-  auto rowC = at::empty(indexA.type(), {nnzC});
+  auto rowC = at::empty(nnzC, indexA.type());
   cusparseXcsr2coo(cusparse_handle, row_ptrC.data<int>(), nnzC, m,
                    rowC.data<int>(), CUSPARSE_INDEX_BASE_ZERO);
 
diff --git a/setup.py b/setup.py
@@ -5,16 +5,16 @@
 __version__ = '0.2.0'
 url = 'https://github.com/rusty1s/pytorch_sparse'
 
-install_requires = ['numpy', 'scipy']
+install_requires = ['scipy']
 setup_requires = ['pytest-runner']
 tests_require = ['pytest', 'pytest-cov']
 ext_modules = []
 cmdclass = {}
 
 if torch.cuda.is_available():
     ext_modules += [
-        CUDAExtension('matmul_cuda',
-                      ['cuda/matmul.cpp', 'cuda/matmul_cuda.cu'])
+        CUDAExtension('spspmm_cuda',
+                      ['cuda/spspmm.cpp', 'cuda/spspmm_kernel.cu'])
     ]
     cmdclass['build_ext'] = BuildExtension
 
diff --git a/test/test_coalesce.py b/test/test_coalesce.py
@@ -8,6 +8,6 @@ def test_coalesce():
     index = torch.stack([row, col], dim=0)
     value = torch.tensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])
 
-    index, value = coalesce(index, value, torch.Size([4, 2]))
+    index, value = coalesce(index, value, m=3, n=2)
     assert index.tolist() == [[0, 1, 1, 2], [1, 0, 1, 0]]
     assert value.tolist() == [[6, 8], [7, 9], [3, 4], [5, 6]]
diff --git a/test/test_matmul.py b/test/test_matmul.py
diff --git a/test/test_spmm.py b/test/test_spmm.py
@@ -0,0 +1,13 @@
+import torch
+from torch_sparse import spmm
+
+
+def test_spmm():
+    row = torch.tensor([0, 0, 1, 2, 2])
+    col = torch.tensor([0, 2, 1, 0, 1])
+    index = torch.stack([row, col], dim=0)
+    value = torch.tensor([1, 2, 4, 1, 3])
+
+    matrix = torch.tensor([[1, 4], [2, 5], [3, 6]])
+    out = spmm(index, value, 3, matrix)
+    assert out.tolist() == [[7, 16], [8, 20], [7, 19]]
diff --git a/test/test_spspmm.py b/test/test_spspmm.py
@@ -0,0 +1,35 @@
+from itertools import product
+
+import pytest
+import torch
+from torch_sparse import spspmm
+
+from .utils import dtypes, devices, tensor
+
+
+@pytest.mark.parametrize('dtype,device', product(dtypes, devices))
+def test_spspmm(dtype, device):
+    indexA = torch.tensor([[0, 0, 1, 2, 2], [1, 2, 0, 0, 1]], device=device)
+    valueA = tensor([1, 2, 3, 4, 5], dtype, device)
+    sizeA = torch.Size([3, 3])
+    indexB = torch.tensor([[0, 2], [1, 0]], device=device)
+    valueB = tensor([2, 4], dtype, device)
+    sizeB = torch.Size([3, 2])
+
+    indexC, valueC = spspmm(indexA, valueA, indexB, valueB, 3, 3, 2)
+    assert indexC.tolist() == [[0, 1, 2], [0, 1, 1]]
+    assert valueC.tolist() == [8, 6, 8]
+
+    A = torch.sparse_coo_tensor(indexA, valueA, sizeA, device=device)
+    A = A.to_dense().requires_grad_()
+    B = torch.sparse_coo_tensor(indexB, valueB, sizeB, device=device)
+    B = B.to_dense().requires_grad_()
+    torch.matmul(A, B).sum().backward()
+
+    valueA = valueA.requires_grad_()
+    valueB = valueB.requires_grad_()
+    indexC, valueC = spspmm(indexA, valueA, indexB, valueB, 3, 3, 2)
+    valueC.sum().backward()
+
+    assert valueA.grad.tolist() == A.grad[indexA[0], indexA[1]].tolist()
+    assert valueB.grad.tolist() == B.grad[indexB[0], indexB[1]].tolist()
diff --git a/test/test_transpose.py b/test/test_transpose.py
@@ -0,0 +1,13 @@
+import torch
+from torch_sparse import transpose
+
+
+def test_transpose():
+    row = torch.tensor([1, 0, 1, 0, 2, 1])
+    col = torch.tensor([0, 1, 1, 1, 0, 0])
+    index = torch.stack([row, col], dim=0)
+    value = torch.tensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])
+
+    index, value = transpose(index, value, m=3, n=2)
+    assert index.tolist() == [[0, 0, 1, 1], [1, 2, 0, 1]]
+    assert value.tolist() == [[7, 9], [5, 6], [6, 8], [3, 4]]
diff --git a/test/utils.py b/test/utils.py
@@ -3,10 +3,9 @@
 dtypes = [torch.float, torch.double]
 
 devices = [torch.device('cpu')]
-if torch.cuda.is_available():  # pragma: no cover
+if torch.cuda.is_available():
     devices += [torch.device('cuda:{}'.format(torch.cuda.current_device()))]
 
 
-def tensor(x, dtype, device, requires_grad=False):
-    return torch.tensor(
-        x, dtype=dtype, device=device, requires_grad=requires_grad)
+def tensor(x, dtype, device):
+    return torch.tensor(x, dtype=dtype, device=device)
diff --git a/torch_sparse/__init__.py b/torch_sparse/__init__.py
@@ -1,12 +1,14 @@
 from .coalesce import coalesce
 from .transpose import transpose
-from .matmul import spspmm
+from .spmm import spmm
+from .spspmm import spspmm
 
 __version__ = '0.2.0'
 
 __all__ = [
     '__version__',
     'coalesce',
     'transpose',
+    'spmm',
     'spspmm',
 ]
diff --git a/torch_sparse/coalesce.py b/torch_sparse/coalesce.py
@@ -3,6 +3,8 @@
 
 
 def coalesce(index, value, m, n, op='add', fill_value=0):
+    """Row-wise reorders and removes duplicate entries in sparse matrixx."""
+
     row, col = index
 
     unique, inv = torch.unique(row * n + col, sorted=True, return_inverse=True)
diff --git a/torch_sparse/sparse.py b/torch_sparse/sparse.py
diff --git a/torch_sparse/spmm.py b/torch_sparse/spmm.py
@@ -0,0 +1,14 @@
+from torch_scatter import scatter_add
+
+
+def spmm(index, value, m, matrix):
+    """Matrix product of sparse matrix with dense matrix."""
+
+    row, col = index
+    matrix = matrix if matrix.dim() > 1 else matrix.unsqueeze(-1)
+
+    out = matrix[col]
+    out = out * value.unsqueeze(-1)
+    out = scatter_add(out, row, dim=0, dim_size=m)
+
+    return out
diff --git a/torch_sparse/spspmm.py b/torch_sparse/spspmm.py
diff --git a/torch_sparse/transpose.py b/torch_sparse/transpose.py