fkodom
diff --git a/‎fft_conv_pytorch/fft_conv.py‎
Lines changed: 21 additions & 18 deletions b/‎fft_conv_pytorch/fft_conv.py‎
Lines changed: 21 additions & 18 deletions
diff --git a/‎tests/test_fft_conv.py‎
Lines changed: 0 additions & 76 deletions b/‎tests/test_fft_conv.py‎
Lines changed: 0 additions & 76 deletions
diff --git a/‎tests/test_functional.py‎
Lines changed: 123 additions & 0 deletions b/‎tests/test_functional.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎tests/test_module.py‎
Lines changed: 127 additions & 0 deletions b/‎tests/test_module.py‎
Lines changed: 127 additions & 0 deletions
@@ -57,6 +57,7 @@ def fft_conv(
     bias: Tensor = None,
     padding: Union[int, Iterable[int]] = 0,
     stride: Union[int, Iterable[int]] = 1,
+    dilation: Union[int, Iterable[int]] = 1,
     groups: int = 1,
 ) -> Tensor:
     """Performs N-d convolution of Tensors using a fast fourier transform, which
@@ -74,9 +75,23 @@ def fft_conv(
     Returns:
         (Tensor) Convolved tensor
     """
-    # Cast padding & stride to tuples.
-    padding_ = to_ntuple(padding, n=signal.ndim - 2)
-    stride_ = to_ntuple(stride, n=signal.ndim - 2)
+
+    # Cast padding, stride & dilation to tuples.
+    n = signal.ndim - 2
+    padding_ = to_ntuple(padding, n=n)
+    stride_ = to_ntuple(stride, n=n)
+    dilation_ = to_ntuple(dilation, n=n)
+
+    # internal dilation offsets
+    offset = torch.zeros(1, 1, *dilation_)
+    offset[(slice(None), slice(None), *((0,) * n))] = 1.
+
+    # correct the kernel by cutting off unwanted dilation trailing zeros
+    cutoff = tuple(
+        slice(None, -d + 1 if d != 1 else None) for d in dilation_) 
+
+    # pad the kernel internally according to the dilation parameters
+    kernel = torch.kron(kernel, offset)[(slice(None), slice(None)) + cutoff]
 
     # Pad the input signal & kernel tensors
     signal_padding = [p for p in padding_[::-1] for _ in range(2)]
@@ -167,21 +182,8 @@ def __init__(
             )
 
         kernel_size = to_ntuple(kernel_size, ndim)
-        dilation = to_ntuple(dilation, ndim)
-        total_size = tuple(
-            ((ks - 1) * dil + 1)
-            for ks, dil in zip(kernel_size, dilation)
-        )
-        weight = torch.zeros(out_channels, in_channels // groups, *total_size)
-        fill = torch.randn(out_channels, in_channels // groups, *kernel_size)
-        ids = tuple(
-            torch.arange(0, tot_sz, dil)
-            for tot_sz, dil in zip(total_size, dilation)
-        )
-        
-        # workaround bc PyTorch doesn't support [:, :, <tensor tuple>] indexing
-        weight[(slice(None), slice(None),) + torch.meshgrid(*ids)] = fill
-        
+        weight = torch.randn(out_channels, in_channels // groups, *kernel_size)
+
         self.weight = nn.Parameter(weight)
         self.bias = nn.Parameter(torch.randn(out_channels)) if bias else None
 
@@ -192,6 +194,7 @@ def forward(self, signal):
             bias=self.bias,
             padding=self.padding,
             stride=self.stride,
+            dilation=self.dilation,
             groups=self.groups,
         )
 
 
@@ -0,0 +1,123 @@
+from typing import Iterable, Union
+
+import pytest
+import torch
+import torch.nn.functional as f
+
+from fft_conv_pytorch.fft_conv import _FFTConv, fft_conv, to_ntuple
+from tests.utils import _assert_almost_equal, _gcd
+
+
+
+@pytest.mark.parametrize("in_channels", [1, 2, 3])
+@pytest.mark.parametrize("out_channels", [1, 2, 3])
+@pytest.mark.parametrize("groups", [1, 2, 3])
+@pytest.mark.parametrize("kernel_size", [1, 2, 3])
+@pytest.mark.parametrize("padding", [0, 1])
+@pytest.mark.parametrize("stride", [1, 2, 3])
+@pytest.mark.parametrize("dilation", [1, 2, 3])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("ndim", [1, 2, 3])
+@pytest.mark.parametrize("input_size", [7, 8])
+def test_fft_conv_functional(
+    in_channels: int,
+    out_channels: int,
+    kernel_size: Union[int, Iterable[int]],
+    padding: Union[int, Iterable[int]],
+    stride: Union[int, Iterable[int]],
+    dilation: Union[int, Iterable[int]],
+    groups: int,
+    bias: bool,
+    ndim: int,
+    input_size: int,
+):
+    torch_conv = getattr(f, f"conv{ndim}d")
+    groups = _gcd(in_channels, _gcd(out_channels, groups))
+
+    batch_size = 2  # TODO: Make this non-constant?
+    dims = ndim * [input_size]
+    signal = torch.randn(batch_size, in_channels, *dims)
+    kwargs = dict(
+        bias=torch.randn(out_channels) if bias else None,
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+    )
+
+    kernel_size = to_ntuple(kernel_size, n=signal.ndim - 2)
+    w0 = torch.randn(out_channels, in_channels // groups, *kernel_size,
+                     requires_grad=True)
+    w1 = w0.detach().clone().requires_grad_()
+
+    b0 = torch.randn(out_channels, requires_grad=True) if bias else None
+    b1 = b0.detach().clone().requires_grad_() if bias else None
+
+    kwargs = dict(
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+    )
+
+    y0 = fft_conv(signal, w0, bias=b0, **kwargs)
+    y1 = torch_conv(signal, w1, bias=b1, **kwargs)
+    
+    _assert_almost_equal(y0, y1)
+
+
+@pytest.mark.parametrize("in_channels", [1, 2, 3])
+@pytest.mark.parametrize("out_channels", [1, 2, 3])
+@pytest.mark.parametrize("groups", [1, 2, 3])
+@pytest.mark.parametrize("kernel_size", [1, 2, 3])
+@pytest.mark.parametrize("padding", [0, 1])
+@pytest.mark.parametrize("stride", [1, 2, 3])
+@pytest.mark.parametrize("dilation", [1, 2, 3])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("ndim", [1, 2, 3])
+@pytest.mark.parametrize("input_size", [7, 8])
+def test_fft_conv_backward_functional(
+    in_channels: int,
+    out_channels: int,
+    kernel_size: Union[int, Iterable[int]],
+    padding: Union[int, Iterable[int]],
+    stride: Union[int, Iterable[int]],
+    dilation: Union[int, Iterable[int]],
+    groups: int,
+    bias: bool,
+    ndim: int,
+    input_size: int,
+):
+    torch_conv = getattr(f, f"conv{ndim}d")
+    groups = _gcd(in_channels, _gcd(out_channels, groups))
+
+    batch_size = 2  # TODO: Make this non-constant?
+    dims = ndim * [input_size]
+    signal = torch.randn(batch_size, in_channels, *dims)
+
+    kernel_size = to_ntuple(kernel_size, n=signal.ndim - 2)
+    w0 = torch.randn(out_channels, in_channels // groups, *kernel_size,
+                     requires_grad=True)
+    w1 = w0.detach().clone().requires_grad_()
+    
+    b0 = torch.randn(out_channels, requires_grad=True) if bias else None
+    b1 = b0.detach().clone().requires_grad_() if bias else None
+
+    kwargs = dict(
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+    )
+
+    y0 = fft_conv(signal, w0, bias=b0, **kwargs)
+    y1 = torch_conv(signal, w1, bias=b1, **kwargs)
+    
+    # Compute pseudo-loss and gradient
+    y0.sum().backward()
+    y1.sum().backward()
+ 
+    _assert_almost_equal(w0.grad, w1.grad)
+
+    if bias: 
+        _assert_almost_equal(b0.grad, b1.grad)
@@ -0,0 +1,127 @@
+from typing import Iterable, Union
+
+import pytest
+import torch
+import torch.nn.functional as f
+
+from fft_conv_pytorch.fft_conv import _FFTConv, fft_conv
+from tests.utils import _assert_almost_equal, _gcd
+
+
+
+@pytest.mark.parametrize("in_channels", [1, 2, 3])
+@pytest.mark.parametrize("out_channels", [1, 2, 3])
+@pytest.mark.parametrize("groups", [1, 2, 3])
+@pytest.mark.parametrize("kernel_size", [1, 2, 3])
+@pytest.mark.parametrize("padding", [0, 1])
+@pytest.mark.parametrize("stride", [1, 2, 3])
+@pytest.mark.parametrize("dilation", [1, 2, 3])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("ndim", [1, 2, 3])
+@pytest.mark.parametrize("input_size", [7, 8])
+def test_fft_conv_module(
+    in_channels: int,
+    out_channels: int,
+    kernel_size: Union[int, Iterable[int]],
+    padding: Union[int, Iterable[int]],
+    stride: Union[int, Iterable[int]],
+    dilation: Union[int, Iterable[int]],
+    groups: int,
+    bias: bool,
+    ndim: int,
+    input_size: int,
+):
+    torch_conv = getattr(f, f"conv{ndim}d")
+    groups = _gcd(in_channels, _gcd(out_channels, groups))
+    fft_conv_layer = _FFTConv(
+        in_channels=in_channels,
+        out_channels=out_channels,
+        kernel_size=kernel_size,
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+        bias=bias,
+        ndim=ndim,
+    )
+    batch_size = 2  # TODO: Make this non-constant?
+    dims = ndim * [input_size]
+    signal = torch.randn(batch_size, in_channels, *dims)
+
+    weight = fft_conv_layer.weight
+    bias = fft_conv_layer.bias
+
+    kwargs = dict(
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+    )
+
+    y0 = fft_conv_layer(signal)
+    y1 = torch_conv(signal, weight, bias=bias, **kwargs)
+    
+    _assert_almost_equal(y0, y1)
+
+
+@pytest.mark.parametrize("in_channels", [1, 2, 3])
+@pytest.mark.parametrize("out_channels", [1, 2, 3])
+@pytest.mark.parametrize("groups", [1, 2, 3])
+@pytest.mark.parametrize("kernel_size", [1, 2, 3])
+@pytest.mark.parametrize("padding", [0, 1])
+@pytest.mark.parametrize("stride", [1, 2, 3])
+@pytest.mark.parametrize("dilation", [1, 2, 3])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("ndim", [1, 2, 3])
+@pytest.mark.parametrize("input_size", [7, 8])
+def test_fft_conv_backward_module(
+    in_channels: int,
+    out_channels: int,
+    kernel_size: Union[int, Iterable[int]],
+    padding: Union[int, Iterable[int]],
+    stride: Union[int, Iterable[int]],
+    dilation: Union[int, Iterable[int]],
+    groups: int,
+    bias: bool,
+    ndim: int,
+    input_size: int,
+):
+    torch_conv = getattr(f, f"conv{ndim}d")
+    groups = _gcd(in_channels, _gcd(out_channels, groups))
+    fft_conv_layer = _FFTConv(
+        in_channels=in_channels,
+        out_channels=out_channels,
+        kernel_size=kernel_size,
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+        bias=bias,
+        ndim=ndim,
+    )
+    batch_size = 2  # TODO: Make this non-constant?
+    dims = ndim * [input_size]
+    signal = torch.randn(batch_size, in_channels, *dims)
+
+    w0 = fft_conv_layer.weight
+    w1 = w0.detach().clone().requires_grad_()
+    b0 = fft_conv_layer.bias
+    b1 = b0.detach().clone().requires_grad_() if bias else None
+
+    kwargs = dict(
+        padding=padding,
+        stride=stride,
+        dilation=dilation,
+        groups=groups,
+    )
+
+    y0 = fft_conv_layer(signal)
+    y1 = torch_conv(signal, w1, bias=b1, **kwargs)
+
+    # Compute pseudo-loss and gradient
+    y0.sum().backward()
+    y1.sum().backward()
+
+    _assert_almost_equal(w0.grad, w1.grad)
+    if bias:
+        _assert_almost_equal(b0.grad, b1.grad)