update PReLU implementation

SkafteNicki · SkafteNicki · commit 9ae5c8e68e06 · 2022-02-16T13:42:48.000+01:00
diff --git a/examples/local_pca_mnist.py b/examples/local_pca_mnist.py
@@ -34,7 +34,7 @@ def get_subset_mnist(n: int = 1000):
 # Plot metric and data
 plt.figure()
 ran = torch.linspace(-3.0, 3.0, 100)
-X, Y = torch.meshgrid([ran, ran], indexing='ij')
+X, Y = torch.meshgrid([ran, ran], indexing="ij")
 XY = torch.stack((X.flatten(), Y.flatten()), dim=1)  # 10000x2
 gridM = M.metric(XY)  # 10000x2
 Mim = gridM.sum(dim=1).reshape((100, 100)).detach().t()
@@ -56,7 +56,7 @@ def get_subset_mnist(n: int = 1000):
 # Compute discretized geodesics
 plt.figure()
 ran2 = torch.linspace(-3.0, 3.0, 133)
-X2, Y2 = torch.meshgrid([ran2, ran2], indexing='ij')
+X2, Y2 = torch.meshgrid([ran2, ran2], indexing="ij")
 XY2 = torch.stack((X2.flatten(), Y2.flatten()), dim=1)  # 10000x2
 DMim = DM.metric(XY2).log().sum(dim=1).view(133, 133).t()
 plt.imshow(DMim, extent=(ran[0], ran[-1], ran[0], ran[-1]), origin="lower")
diff --git a/stochman/discretized_manifold.py b/stochman/discretized_manifold.py
@@ -55,7 +55,7 @@ def fit(self, model, grid, use_diagonals=True, batch_size=4, interpolation_noise
 
         dim = len(grid)
         if len(grid) != 2:
-            raise Exception('Currently we only support 2D grids -- sorry!')
+            raise Exception("Currently we only support 2D grids -- sorry!")
 
         # Add nodes to graph
         xsize, ysize = len(grid[0]), len(grid[1])
@@ -64,7 +64,7 @@ def fit(self, model, grid, use_diagonals=True, batch_size=4, interpolation_noise
 
         point_set = torch.cartesian_prod(
             torch.linspace(0, xsize - 1, xsize, dtype=torch.long),
-            torch.linspace(0, ysize - 1, ysize, dtype=torch.long)
+            torch.linspace(0, ysize - 1, ysize, dtype=torch.long),
         )  # (big)x2
 
         point_sets = []  # these will be [N, 2] matrices of index points
@@ -93,8 +93,8 @@ def fit(self, model, grid, use_diagonals=True, batch_size=4, interpolation_noise
         t = torch.linspace(0, 1, 2)
         for ps, nf in zip(point_sets, neighbour_funcs):
             for i in range(ceil(ps.shape[0] / batch_size)):
-                x = ps[batch_size * i:batch_size * (i + 1), 0]
-                y = ps[batch_size * i:batch_size * (i + 1), 1]
+                x = ps[batch_size * i : batch_size * (i + 1), 0]
+                y = ps[batch_size * i : batch_size * (i + 1), 1]
                 xn, yn = nf[0](x), nf[1](y)
 
                 bs = x.shape[0]  # may be different from batch size for the last batch
@@ -132,7 +132,7 @@ def fit(self, model, grid, use_diagonals=True, batch_size=4, interpolation_noise
                 self.__metric__ = M.view([*self.grid_size, d, d])  # e.g. (xsize)x(ysize)x(d)x(d)
 
             # Compute interpolation weights. We use the mean function of a GP regressor.
-            mesh = torch.meshgrid(*self.grid, indexing='ij')
+            mesh = torch.meshgrid(*self.grid, indexing="ij")
             grid_points = torch.cat(
                 [m.unsqueeze(-1) for m in mesh], dim=-1
             )  # e.g. 100x100x2 a 2D grid with 100 points in each dim
@@ -145,6 +145,7 @@ def fit(self, model, grid, use_diagonals=True, batch_size=4, interpolation_noise
             )  # (num_grid)x(d²) or (num_grid)x(d)
         except:
             import warnings
+
             warnings.warn("It appears that your model does not implement a metric.")
             # XXX: Down the road, we should be able to estimate the metric from the observed distances
 
@@ -191,9 +192,9 @@ def _grid_dist2(self, p):
         """
 
         dist2 = torch.zeros(p.shape[0], self.G.number_of_nodes())
-        mesh = torch.meshgrid(*self.grid, indexing='ij')
+        mesh = torch.meshgrid(*self.grid, indexing="ij")
         for mesh_dim, dim in zip(mesh, range(len(self.grid))):
-            dist2 += (p[:, dim].view(-1, 1) - mesh_dim.reshape(1, -1))**2
+            dist2 += (p[:, dim].view(-1, 1) - mesh_dim.reshape(1, -1)) ** 2
         return dist2
 
     def _kernel(self, p):
@@ -205,12 +206,12 @@ def _kernel(self, p):
         Output:
             val:    a torch Tensor with the kernel values.
         """
-        lengthscales = [(g[1] - g[0])**2 for g in self.grid]
+        lengthscales = [(g[1] - g[0]) ** 2 for g in self.grid]
 
         dist2 = torch.zeros(p.shape[0], self.G.number_of_nodes())
-        mesh = torch.meshgrid(*self.grid, indexing='ij')
+        mesh = torch.meshgrid(*self.grid, indexing="ij")
         for mesh_dim, dim in zip(mesh, range(len(self.grid))):
-            dist2 += (p[:, dim].view(-1, 1) - mesh_dim.reshape(1, -1))**2 / lengthscales[dim]
+            dist2 += (p[:, dim].view(-1, 1) - mesh_dim.reshape(1, -1)) ** 2 / lengthscales[dim]
 
         return torch.exp(-dist2)
 
@@ -241,9 +242,9 @@ def shortest_path(self, p1, p2):
         """
         idx1 = self._grid_point(p1)
         idx2 = self._grid_point(p2)
-        path = nx.shortest_path(self.G, source=idx1, target=idx2, weight='weight')  # list with N elements
+        path = nx.shortest_path(self.G, source=idx1, target=idx2, weight="weight")  # list with N elements
         # coordinates = self.grid.view(self.grid.shape[0], -1)[:, path] # (dim)xN
-        mesh = torch.meshgrid(*self.grid, indexing='ij')
+        mesh = torch.meshgrid(*self.grid, indexing="ij")
         raw_coordinates = [m.flatten()[path].view(1, -1) for m in mesh]
         coordinates = torch.cat(raw_coordinates, dim=0)  # (dim)xN
         N = len(path)
@@ -252,7 +253,7 @@ def shortest_path(self, p1, p2):
             curve.parameters[:, :] = coordinates[:, 1:-1].t()
         dist = 0
         for i in range(N - 1):
-            dist += self.G.edges[path[i], path[i + 1]]['weight']
+            dist += self.G.edges[path[i], path[i + 1]]["weight"]
         return curve, dist
 
     def connecting_geodesic(self, p1, p2, curve=None):
@@ -282,7 +283,7 @@ def connecting_geodesic(self, p1, p2, curve=None):
             p2 = p2.unsqueeze(0)  # 1xD
         B = p1.shape[0]
         if p1.shape != p2.shape:
-            raise NameError('shape mismatch')
+            raise NameError("shape mismatch")
 
         if curve is None:
             curve = CubicSpline(p1, p2)
@@ -295,10 +296,10 @@ def connecting_geodesic(self, p1, p2, curve=None):
                 idx1 = self._grid_point(p1[b].unsqueeze(0))
                 idx2 = self._grid_point(p2[b].unsqueeze(0))
                 path = nx.shortest_path(
-                    self.G, source=idx1, target=idx2, weight='weight'
+                    self.G, source=idx1, target=idx2, weight="weight"
                 )  # list with N elements
-                weights = [self.G.edges[path[k], path[k + 1]]['weight'] for k in range(len(path) - 1)]
-                mesh = torch.meshgrid(*self.grid, indexing='ij')
+                weights = [self.G.edges[path[k], path[k + 1]]["weight"] for k in range(len(path) - 1)]
+                mesh = torch.meshgrid(*self.grid, indexing="ij")
                 raw_coordinates = [m.flatten()[path[1:-1]].view(-1, 1) for m in mesh]
                 coordinates = torch.cat(raw_coordinates, dim=1)  # Nx(dim)
                 t = torch.tensor(weights[:-1], device=device).cumsum(dim=0) / sum(weights)
diff --git a/stochman/nnj.py b/stochman/nnj.py
@@ -319,6 +319,14 @@ def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
         return jac
 
 
+class PReLU(AbstractActivationJacobian, nn.PReLU):
+    def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
+        jac = (val >= 0.0).type(val.dtype) + (val < 0.0).type(val.dtype) * self.weight.reshape(
+            (1, self.num_parameters) + (1,) * (val.ndim - 2)
+        )
+        return jac
+
+
 class ELU(AbstractActivationJacobian, nn.ELU):
     def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
         jac = torch.ones_like(val)
@@ -340,13 +348,6 @@ def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
         return jac
 
 
-class PReLU(AbstractActivationJacobian, nn.PReLU):
-    def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
-        jac = torch.ones_like(val)
-        jac[x < 0.0] = self.weight
-        return jac
-
-
 class LeakyReLU(AbstractActivationJacobian, nn.LeakyReLU):
     def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
         jac = torch.ones_like(val)
diff --git a/tests/test_nnj.py b/tests/test_nnj.py
@@ -56,6 +56,7 @@ def _compare_jacobian(f: Callable, x: torch.Tensor) -> torch.Tensor:
         (nnj.Sequential(nnj.Linear(_features, 2), nnj.OneMinusX()), _linear_input_shape),
         (nnj.Sequential(nnj.Linear(_features, 2), nnj.PReLU()), _linear_input_shape),
         (nnj.Sequential(nnj.Linear(_features, 2), nnj.Softmax(dim=-1)), _linear_input_shape),
+        (nnj.Sequential(nnj.Linear(_features, 2), nnj.PReLU()), _linear_input_shape),
         (
             nnj.Sequential(nnj.Conv1d(_features, 2, 5), nnj.ConvTranspose1d(2, _features, 5)),
             _1d_conv_input_shape,
@@ -145,7 +146,7 @@ def test_jacobians(self, model, input_shape, device, dtype):
         input = torch.randn(*input_shape, device=device, dtype=dtype)
         _, jac = model(input, jacobian=True)
         jacnum = _compare_jacobian(model, input).to(device)
-        assert torch.isclose(jac, jacnum, atol=1e-4).all(), "jacobians did not match"
+        assert torch.isclose(jac, jacnum, atol=1e-3).all(), "jacobians did not match"
 
     @pytest.mark.parametrize("return_jac", [True, False])
     def test_jac_return(self, model, input_shape, device, return_jac):