Merge pull request #15 from fkodom/add-benchmarks

fkodom · web-flow · commit 833f2710798e · 2022-01-25T17:11:39.000-06:00
Add Benchmark Plots
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,132 @@
-__pycache__
-.idea
-.vscode
-.mypy_cache
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+client_secrets.json
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
 .ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@ Implementation of 1D, 2D, and 3D FFT convolutions in PyTorch.
 * **Much slower** than direct convolution for small kernels.
 * In my local tests, FFT convolution is faster when the kernel has >100 or so elements.
     * Dependent on machine and PyTorch version.
+    * Also see benchmarks below.
 
 
 ## Install
@@ -21,7 +22,7 @@ cd fft-conv-pytorch
 pip install .
 ```
 
-### Example Usage
+## Example Usage
 
 ```python
 import torch
@@ -45,4 +46,18 @@ fft_conv = FFTConv1d(3, 2, 128, bias=True)
 fft_conv.weight = torch.nn.Parameter(kernel)
 fft_conv.bias = torch.nn.Parameter(bias)
 out = fft_conv(signal)
-```
+```
+
+## Benchmarks
+
+Benchmarking FFT convolution against the direct convolution from PyTorch in 1D, 2D, 
+and 3D. The exact times are heavily dependent on your local machine, but relative 
+scaling with kernel size is always the same. 
+
+Dimensions | Input Size   | Input Channels | Output Channels | Bias | Padding | Stride | Dilation
+-----------|--------------|----------------|-----------------|------|---------|--------|---------
+1          | (4096)       | 4              | 4               | True | 0       | 1      | 1
+2          | (512, 512)   | 4              | 4               | True | 0       | 1      | 1
+3          | (64, 64, 64) | 4              | 4               | True | 0       | 1      | 1
+
+![Benchmark Plot](doc/benchmark.png)
diff --git a/doc/benchmark.png b/doc/benchmark.png
diff --git a/doc/scripts/generate_benchmark_plot.py b/doc/scripts/generate_benchmark_plot.py
@@ -0,0 +1,145 @@
+from functools import lru_cache, partial
+from timeit import Timer
+from typing import Callable, Dict, Iterable, List, NamedTuple, Optional, Sequence, Union
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn.functional as f
+from tqdm import tqdm
+
+from fft_conv_pytorch.fft_conv import fft_conv, to_ntuple
+
+
+class Benchmark(NamedTuple):
+    mean: float
+    std: float
+
+    def __repr__(self):
+        return f"BenchmarkResult(mean: {self.mean:.3e}, std: {self.std:.3e})"
+
+    def __str__(self):
+        return f"({self.mean:.3e} \u00B1 {self.std:.3e}) s"
+
+
+def benchmark(fn: Callable, *args, num_iterations: int = 10, **kwargs) -> Benchmark:
+    timer = Timer(
+        "fn(*args, **kwargs)", globals={"fn": fn, "args": args, "kwargs": kwargs},
+    )
+    times = timer.repeat(number=1, repeat=num_iterations + 1)
+    return Benchmark(np.mean(times[1:]).item(), np.std(times[1:]).item())
+
+
+@lru_cache(maxsize=1)
+def _get_conv_inputs(
+    ndim: int,
+    input_size: int,
+    kernel_size: Union[int, Iterable[int]],
+    batch_size: int = 2,
+    in_channels: int = 8,
+    out_channels: int = 8,
+):
+    dims = ndim * [input_size]
+    signal = torch.randn(batch_size, in_channels, *dims)
+
+    kernel_size = to_ntuple(kernel_size, n=signal.ndim - 2)
+    weight = torch.randn(out_channels, in_channels, *kernel_size, requires_grad=True)
+    bias = torch.randn(out_channels, requires_grad=True)
+
+    return signal, weight, bias
+
+
+def benchmark_conv(
+    ndim: int,
+    input_size: int,
+    kernel_size: int,
+    fft: bool = True,
+    num_iterations: int = 10,
+):
+    conv_fn = fft_conv if fft else getattr(f, f"conv{ndim}d")
+    signal, weight, bias = _get_conv_inputs(
+        ndim=ndim, input_size=input_size, kernel_size=kernel_size
+    )
+    return benchmark(conv_fn, signal, weight, bias=bias, num_iterations=num_iterations)
+
+
+def benchmark_kernel_size(
+    kernel_sizes: Sequence[int],
+    ndim: int,
+    input_size: int,
+    fft: bool = True,
+    num_iterations: int = 10,
+    desc: str = "",
+) -> List[Benchmark]:
+    fn = partial(
+        benchmark_conv,
+        ndim=ndim,
+        input_size=input_size,
+        fft=fft,
+        num_iterations=num_iterations,
+    )
+    return [fn(kernel_size=k) for k in tqdm(kernel_sizes, desc=desc)]
+
+
+def _plot_benchmarks(
+    benchmarks: List[Benchmark],
+    config: Dict,
+    ax: plt.Axes,
+    color: str,
+    label: Optional[str] = None,
+):
+    xs = config["kernel_sizes"]
+    ys = np.array([b.mean * 1000 for b in benchmarks])
+    std = np.array([b.std * 1000 for b in benchmarks])
+    ax.plot(xs, ys, color, label=label)
+    ax.fill_between(
+        xs, ys - std, ys + std, facecolor=color, alpha=0.25, label="_nolegend_"
+    )
+
+    ndim = config["ndim"]
+    ax.set_title(f"{ndim}D")
+    kernel_size_str = "(" + " x ".join(["n"] * ndim) + ")"
+    ax.set_xlabel(f"Kernel Size {kernel_size_str}")
+
+
+if __name__ == "__main__":
+    import os
+
+    configs = [
+        {
+            "ndim": 1,
+            "input_size": 4096,
+            "num_iterations": 256,
+            "kernel_sizes": np.arange(64, 513, 64),
+        },
+        {
+            "ndim": 2,
+            "input_size": 512,
+            "num_iterations": 16,
+            "kernel_sizes": np.arange(4, 49, 6),
+        },
+        {
+            "ndim": 3,
+            "input_size": 64,
+            "num_iterations": 16,
+            "kernel_sizes": np.arange(2, 17, 2),
+        },
+    ]
+
+    save_dir = os.path.join(os.path.dirname(__file__), os.path.pardir)
+    fix, ax = plt.subplots(
+        1, len(configs), figsize=(4 * len(configs), 4), squeeze=False
+    )
+
+    for i, config in enumerate(configs):
+        fft = benchmark_kernel_size(fft=True, **config, desc=f"FFT {config['ndim']}D")
+        _plot_benchmarks(fft, config=config, ax=ax[0, i], color="r", label="FFT")
+
+        direct = benchmark_kernel_size(
+            fft=False, **config, desc=f"Direct {config['ndim']}D"
+        )
+        _plot_benchmarks(direct, config=config, ax=ax[0, i], color="b", label="Direct")
+
+    ax[0, 0].set_ylabel("Execution Time (ms)")
+    plt.legend(["FFT", "Direct"])
+    plt.savefig(os.path.join(save_dir, "benchmark.png"))