Modified TorchOperator for cupy integration

mrava87 · mrava87 · commit 442361b60ab5 · 2021-01-10T20:29:40.000+01:00
Use dlpack as explained in https://docs.cupy.dev/en/stable/reference/interoperability.html to allow moving arrays on gpu between pytorch and cupy.
diff --git a/pylops_gpu/TorchOperator.py b/pylops_gpu/TorchOperator.py
@@ -1,5 +1,13 @@
 import torch
 
+from torch.utils.dlpack import from_dlpack, to_dlpack
+from pylops.utils import deps
+
+if deps.cupy_enabled:
+    import cupy as cp
+else:
+    cp = None
+
 
 class _TorchOperator(torch.autograd.Function):
     """Wrapper class for PyLops operators into Torch functions
@@ -16,20 +24,47 @@ def forward(ctx, x, forw, adj, pylops, device):
         ctx.pylops = pylops
         ctx.device = device
 
+        # prepare input
         if ctx.pylops:
-            x = x.cpu().detach().numpy()
+            if ctx.device == 'cpu':
+                # bring x to cpu and numpy
+                x = x.cpu().detach().numpy()
+            else:
+                # pass x to cupy using DLPack
+                x = cp.fromDlpack(to_dlpack(x))
+
+        # apply forward operator
         y = ctx.forw(x)
+
+        # prepare output
         if ctx.pylops:
-            y = torch.from_numpy(y).to(ctx.device)
+            if ctx.device == 'cpu':
+                # move y to torch and device
+                y = torch.from_numpy(y).to(ctx.device)
+            else:
+                # move y to torch and device
+                y = from_dlpack(y.toDlpack())
         return y
 
     @staticmethod
     def backward(ctx, y):
+        # prepare input
         if ctx.pylops:
-            y = y.cpu().detach().numpy()
+            if ctx.device == 'cpu':
+                y = y.cpu().detach().numpy()
+            else:
+                # pass x to cupy using DLPack
+                y = cp.fromDlpack(to_dlpack(y))
+
+        # apply adjoint operator
         x = ctx.adj(y)
+
+        # prepare output
         if ctx.pylops:
-            x = torch.from_numpy(x).to(ctx.device)
+            if ctx.device == 'cpu':
+                x = torch.from_numpy(x).to(ctx.device)
+            else:
+                x = from_dlpack(x.toDlpack())
         return  x, None, None, None, None
 
 
@@ -75,6 +110,7 @@ def __init__(self, Op, batch=False, pylops=False, device='cpu'):
         else:
             self.matvec = lambda x: Op.matmat(x, kfirst=True)
             self.rmatvec = lambda x: Op.rmatmat(x, kfirst=True)
+        self.Top = _TorchOperator.apply
 
     def apply(self, x):
         """Apply forward pass to input vector
@@ -90,5 +126,5 @@ def apply(self, x):
             Output array resulting from the application of the operator to ``x``.
 
         """
-        return _TorchOperator.apply(x, self.matvec, self.rmatvec,
-                                    self.pylops, self.device)
+        return self.Top(x, self.matvec, self.rmatvec,
+                        self.pylops, self.device)
diff --git a/tutorials/ad.py b/tutorials/ad.py
@@ -19,6 +19,7 @@
 import numpy as np
 import torch
 import matplotlib.pyplot as plt
+from torch.autograd import gradcheck
 
 import pylops_gpu
 from pylops_gpu.utils.backend import device
@@ -61,15 +62,15 @@
 # :math:`\mathbf{v}` that we have provided to PyTorch ``backward``.
 
 nx, ny = 10, 6
-x0 = torch.arange(nx, dtype=torch.float32, requires_grad=True)
+x0 = torch.arange(nx, dtype=torch.double, requires_grad=True)
 
 # Forward
-A = torch.normal(0., 1., (ny, nx))
+A = torch.normal(0., 1., (ny, nx), dtype=torch.double)
 Aop = pylops_gpu.TorchOperator(pylops_gpu.MatrixMult(A))
 y = Aop.apply(torch.sin(x0))
 
 # AD
-v = torch.ones(ny)
+v = torch.ones(ny, dtype=torch.double)
 y.backward(v, retain_graph=True)
 adgrad = x0.grad
 
@@ -81,6 +82,17 @@
 print('AD gradient: ', adgrad)
 print('Analytical gradient: ', anagrad)
 
+
+###############################################################################
+# Similarly we can use the :func:`torch.autograd.gradcheck` directly from
+# PyTorch. Note that doubles must be used for this to succeed with very small
+# `eps` and `atol`
+input = (torch.arange(nx, dtype=torch.double, requires_grad=True),
+         Aop.matvec, Aop.rmatvec, Aop.pylops, Aop.device)
+test = gradcheck(Aop.Top, input, eps=1e-6, atol=1e-4)
+print(test)
+
+
 ###############################################################################
 # Note that while matrix-vector multiplication could have been performed using
 # the native PyTorch operator :func:`torch.matmul`, in this case we have shown