support softmax

SkafteNicki · SkafteNicki · commit 14b7a3280314 · 2022-02-02T17:02:12.000+01:00
diff --git a/stochman/nnj.py b/stochman/nnj.py
@@ -266,6 +266,26 @@ def __call__(self, x: Tensor, jacobian: bool = False) -> Union[Tensor, Tuple[Ten
         return val
 
 
+class Softmax(AbstractActivationJacobian, nn.Softmax):
+    def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
+        if self.dim == 0:
+            raise ValueError("Jacobian computation not supported for `dim=0`")
+        jac = torch.diag_embed(val) - torch.matmul(val.unsqueeze(-1), val.unsqueeze(-2))
+        return jac
+
+    def _jacobian_mult(self, x: Tensor, val: Tensor, jac_in: Tensor) -> Tensor:
+        jac = self._jacobian(x, val)
+        n = jac_in.ndim - jac.ndim
+        jac = jac.reshape((1,) * n + jac.shape)
+        if jac_in.ndim == 4:
+            return (jac @ jac_in.permute(3, 0, 1, 2)).permute(1, 2, 3, 0)
+        if jac_in.ndim == 5:
+            return (jac @ jac_in.permute(3, 4, 0, 1, 2)).permute(2, 3, 4, 0, 1)
+        if jac_in.ndim == 6:
+            return (jac @ jac_in.permute(3, 4, 5, 0, 1, 2)).permute(3, 4, 5, 0, 1, 2)
+        return jac @ jac_in
+
+
 class BatchNorm1d(AbstractActivationJacobian, nn.BatchNorm1d):
     # only implements jacobian during testing
     def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
diff --git a/tests/test_nnj.py b/tests/test_nnj.py
@@ -54,6 +54,7 @@ def _compare_jacobian(f: Callable, x: torch.Tensor) -> torch.Tensor:
         (nnj.Sequential(nnj.Linear(_features, 2), nnj.Tanh()), _linear_input_shape),
         (nnj.Sequential(nnj.Linear(_features, 2), nnj.OneMinusX()), _linear_input_shape),
         (nnj.Sequential(nnj.Linear(_features, 2), nnj.PReLU()), _linear_input_shape),
+        (nnj.Sequential(nnj.Linear(_features, 2), nnj.Softmax(dim=-1)), _linear_input_shape),
         (
             nnj.Sequential(nnj.Conv1d(_features, 2, 5), nnj.ConvTranspose1d(2, _features, 5)),
             _1d_conv_input_shape,
@@ -98,7 +99,7 @@ def _compare_jacobian(f: Callable, x: torch.Tensor) -> torch.Tensor:
                 nnj.Conv1d(_features, 2, 3),
                 nnj.Flatten(),
                 nnj.Linear(4 * 2, 5),
-                nnj.ReLU(),
+                nnj.Softmax(dim=-1),
             ),
             _1d_conv_input_shape,
         ),
@@ -107,7 +108,7 @@ def _compare_jacobian(f: Callable, x: torch.Tensor) -> torch.Tensor:
                 nnj.Conv2d(_features, 2, 3),
                 nnj.Flatten(),
                 nnj.Linear(4 * 4 * 2, 5),
-                nnj.ReLU(),
+                nnj.Softmax(dim=-1),
             ),
             _2d_conv_input_shape,
         ),
@@ -116,7 +117,7 @@ def _compare_jacobian(f: Callable, x: torch.Tensor) -> torch.Tensor:
                 nnj.Conv3d(_features, 2, 3),
                 nnj.Flatten(),
                 nnj.Linear(4 * 4 * 4 * 2, 5),
-                nnj.ReLU(),
+                nnj.Softmax(dim=-1),
             ),
             _3d_conv_input_shape,
         ),