conv2d gradient v0

jessegrabowski · jessegrabowski · commit 880ef1da7bed · 2025-07-14T21:45:19.000+08:00
diff --git a/pytensor/tensor/signal/conv.py b/pytensor/tensor/signal/conv.py
@@ -6,6 +6,7 @@
 
 from pytensor.gradient import DisconnectedType
 from pytensor.graph import Apply, Constant
+from pytensor.graph.op import Op
 from pytensor.link.c.op import COp
 from pytensor.scalar import as_scalar
 from pytensor.scalar.basic import upcast
@@ -220,18 +221,16 @@ class Convolve2D(Op):
 
     def __init__(
         self,
-        mode: Literal["full", "valid", "same"] = "full",
+        mode: Literal["full", "valid"] = "full",
         boundary: Literal["fill", "wrap", "symm"] = "fill",
         fillvalue: float | int = 0,
     ):
-        if mode not in ("full", "valid", "same"):
+        if mode not in ("full", "valid"):
             raise ValueError(f"Invalid mode: {mode}")
-        if boundary not in ("fill", "wrap", "symm"):
-            raise ValueError(f"Invalid boundary: {boundary}")
 
         self.mode = mode
-        self.boundary = boundary
         self.fillvalue = fillvalue
+        self.boundary = boundary
 
     def make_node(self, in1, in2):
         in1, in2 = map(as_tensor_variable, (in1, in2))
@@ -262,8 +261,13 @@ def make_node(self, in1, in2):
 
     def perform(self, node, inputs, outputs):
         in1, in2 = inputs
+
+        # if all(inpt.dtype.kind in ['f', 'c'] for inpt in inputs):
+        #     outputs[0][0] = scipy_convolve(in1, in2, mode=self.mode, method='fft')
+        #
+        # else:
         outputs[0][0] = scipy_convolve2d(
-            in1, in2, mode=self.mode, boundary=self.boundary, fillvalue=self.fillvalue
+            in1, in2, mode=self.mode, fillvalue=self.fillvalue, boundary=self.boundary
         )
 
     def infer_shape(self, fgraph, node, shapes):
@@ -284,7 +288,18 @@ def infer_shape(self, fgraph, node, shapes):
         return [shape]
 
     def L_op(self, inputs, outputs, output_grads):
-        raise NotImplementedError
+        in1, in2 = inputs
+        incoming_grads = output_grads[0]
+
+        if self.mode == "full":
+            prop_dict = self._props_dict()
+            prop_dict["mode"] = "valid"
+            conv_valid = type(self)(**prop_dict)
+
+            in1_grad = conv_valid(in2, incoming_grads)
+            in2_grad = conv_valid(in1, incoming_grads)
+
+        return [in1_grad, in2_grad]
 
 
 def convolve2d(
@@ -325,6 +340,9 @@ def convolve2d(
     in1 = as_tensor_variable(in1)
     in2 = as_tensor_variable(in2)
 
+    # TODO: Handle boundaries symbolically
+    # TODO: Handle 'same' symbolically
+
     blockwise_convolve = Blockwise(
         Convolve2D(mode=mode, boundary=boundary, fillvalue=fillvalue)
     )
diff --git a/tests/tensor/signal/test_conv.py b/tests/tensor/signal/test_conv.py
@@ -163,5 +163,36 @@ def test_convolve2d(kernel_shape, data_shape, mode, boundary):
         scipy_convolve2d(
             data_val, kernel_val, mode=mode, boundary=boundary, fillvalue=0
         ),
-        rtol=1e-6 if config.floatX == "float32" else 1e-15,
+        atol=1e-6 if config.floatX == "float32" else 1e-8,
     )
+
+    utt.verify_grad(lambda k: op(data_val, k).sum(), [kernel_val], eps=1e-4)
+
+
+# @pytest.mark.parametrize(
+#     "data_shape, kernel_shape", [[(10, 1, 8, 8), (3, 1, 3, 3)], # 8x8 grayscale
+#                                  [(1000, 1, 8, 8), (3, 1, 1, 3)], # same, but with 1000 images
+#                                  [(10, 3, 64, 64), (10, 3, 8, 8)], # 64x64 RGB
+#                                  [(1000, 3, 64, 64), (10, 3, 8, 8)], # same, but with 1000 images
+#                                  [(3, 100, 100, 100), (250, 100, 50, 50)]], # Very large, deep hidden layer or something
+#
+#     ids=lambda x: f"data_shape={x[0]}, kernel_shape={x[1]}"
+# )
+# @pytest.mark.parametrize('func', ['new', 'theano'], ids=['new-impl', 'theano-impl'])
+# def test_conv2d_nn_benchmark(data_shape, kernel_shape, func, benchmark):
+#     import pytensor.tensor as pt
+#     x = pt.tensor("x", shape=data_shape)
+#     y = pt.tensor("y", shape=kernel_shape)
+#
+#     if func == 'new':
+#         out = nn_conv2d(x, y)
+#     else:
+#         out = conv2d(input=x, filters=y, border_mode="valid")
+#
+#     rng = np.random.default_rng(38)
+#     x_test = rng.normal(size=data_shape).astype(x.dtype)
+#     y_test = rng.normal(size=kernel_shape).astype(y.dtype)
+#
+#     fn = function([x, y], out, trust_input=True)
+#
+#     benchmark(fn, x_test, y_test)