API to convolve arrays/tensors with continuous convolution kernels (#378)

wjakob · wjakob · commit 583fde5e0b6a · 2025-04-14T23:01:54.000+09:00
This PR adds new function ``drjit.convolve()`` that repurposes the
``drjit.resample()`` infrastructure to convolve one or more axes of a
Dr.Jit array or tensor with a 1D filter. The user can choose one of
multiple presets or specify a custom functions.
diff --git a/docs/reference.rst b/docs/reference.rst
@@ -116,6 +116,7 @@ Rearranging array contents
 .. autofunction:: tile
 .. autofunction:: repeat
 .. autofunction:: resample
+.. autofunction:: convolve
 
 Random number generation
 ------------------------
diff --git a/drjit/__init__.py b/drjit/__init__.py
@@ -1830,8 +1830,7 @@ def backward(self):
 def resample(
     source: ArrayT,
     shape: Sequence[int],
-    *,
-    filter: Union[Literal["box", "linear", "hamming", "cubic", "lanczos"], Callable[[float], float]] = "cubic",
+    filter: Union[Literal["box", "linear", "hamming", "cubic", "lanczos", "gaussian"], Callable[[float], float]] = "cubic",
     filter_radius: Optional[float] = None
 ) -> ArrayT:
     """
@@ -1872,16 +1871,21 @@ def resample(
     - ``"hamming"``: uses the same number of input samples as ``"linear"`` but
       better preserves sharpness when downscaling. Do not use for upscaling.
 
-    - ``"cubic"``: use cubic filter kernel that uses :math:`4^n`
+    - ``"cubic"``: use cubic filter kernel that queries :math:`4^n`
       neighbors to reconstruct each output sample when upsampling. Produces
       high-quality results. This is the default.
 
-    - ``"lanczos"``: use a windowed Lanczos filter that uses :math:`6^n`
+    - ``"lanczos"``: use a windowed Lanczos filter that queries :math:`6^n`
       neighbors to reconstruct each output sample when upsampling. This is the
       best filter for smooth signals, but also the costliest. The Lanczos
       filter is susceptible to ringing when the input array contains
       discontinuities.
 
+    - ``"gaussian"``: use a Gaussian filter that queries :math:4^n` neighbors
+      to reconstruct each output sample when upsampling. The kernel has a
+      standard deviation of 0.5 and is truncated after 4 standard deviations.
+      This filter is mainly useful when intending to blur a signal.
+
     - Besides the above choices, it is also possible to specify a custom filter.
       To do so, use the ``filter`` argument to pass a Python callable with
       signature ``Callable[[float], float]``. In this case, you must also
@@ -1961,6 +1965,95 @@ def resample(
     else:
         return value
 
+def convolve(
+    source: ArrayT,
+    filter: Union[Literal["box", "linear", "hamming", "cubic", "lanczos", "gaussian"], Callable[[float], float]],
+    filter_radius: float,
+    axis: Union[int, Tuple[int, ...], None] = None
+) -> ArrayT:
+    """
+    Convolve one or more axes of an input array/tensor with a 1D filter
+
+    This function filters one more axes of a Dr.Jit array or tensor, for
+    example to convolve an image with a 2D Gaussian filter to blur spatial
+    detail.
+
+    .. code-block:: python
+
+       image: TensorXf = ...  # a RGB image
+
+       blured_image = dr.convolve(
+           image,
+           filter='gaussian',
+           filter_radius=10
+       )
+
+    The filter weights are renormalized to reduce edge effects near the
+    boundary of the array.
+
+    The function supports a set of provided filters, and custom filters
+    can also be specified. This works analogously to the :py:func:`resample`
+    function, please refer to its documentation for detail.
+
+    Args:
+        source (dr.ArrayBase): The Dr.Jit tensor or 1D array to be resampled.
+
+        filter (str | Callable[[float], float])
+          The desired reconstruction filter, see the above text for an overview.
+          Alternatively, a custom reconstruction filter function can also be
+          specified.
+
+        filter_radius (float)
+          The radius of the continous function to be used in the convolution.
+
+        axis (int | tuple[int, ...] | ... | None): The axis or set of axes
+          along which to convolve. The default argument ``axis=None`` causes all
+          axes to be convolved. Negative values count from the last dimension.
+
+    Returns:
+        drjit.ArrayBase: The resampled output array. Its type matches ``source``.
+    """
+
+    shape = source.shape
+    strides = _compute_strides(shape)
+    ndim = len(shape)
+    tp = type(source)
+    value = source.array
+
+    if axis is None:
+        axis = tuple(range(ndim))
+    elif isinstance(axis, int):
+        axis = (axis, )
+
+    for i in axis:
+        if i < 0:
+            i = ndim + i
+        res = shape[i]
+
+        # Cache resampler in case it can be reused
+        key = (res, res, filter, filter_radius)
+
+        resampler = _resample_cache.get(key, None)
+        if resampler is None:
+            resampler = detail.Resampler(
+                source_res=res,
+                target_res=res,
+                filter=filter,
+                filter_radius=filter_radius,
+                convolve=True
+            )
+            _resample_cache[key] = resampler
+
+        value = custom(_ResampleOp,
+            resampler=resampler,
+            source=value,
+            stride=strides[i])
+
+    if is_tensor_v(tp):
+        return tp(value, shape)
+    else:
+        return value
+
 
 def _normalize_axis_tuple(t: Union[int, Tuple[int, ...]], ndim: int, name: str) -> List[int]:
     if isinstance(t, int):
diff --git a/include/drjit/resample.h b/include/drjit/resample.h
@@ -34,7 +34,7 @@ class DRJIT_EXTRA_EXPORT Resampler {
      * Create a Resampler that uses a predefined reconstruction filter to
      * resample a signal from resolution ``source_res`` to ``target_res``.
      *
-     * The following options are available:
+     * The following ``filter`` presets are available:
      *
      * - ``"box"``: use nearest-neighbor interpolation/averaging. This is
      *   very efficient but generally produces sub-par output that is either
@@ -44,16 +44,29 @@ class DRJIT_EXTRA_EXPORT Resampler {
      *    reconstruct each output sample when upsampling. Tends to produce
      *    relatively blurry results.
      *
-     * - ``"cubic"``: use cubic filter kernel that uses 4 neighbors to
+     * - ``"hamming"``: uses the same number of input samples as ``"linear"``
+     *    but better preserves sharpness when downscaling. Do not use for
+     *    upscaling.
+     *
+     * - ``"cubic"``: use cubic filter kernel that queries 4 neighbors to
      *   reconstruct each output sample when upsampling. Produces high-quality
      *   results.
      *
-     * - ``"lanczos"``: use a windowed Lanczos filter that uses 6 neighbors to
-     *   reconstruct each output sample when upsampling. This is the best filter
-     *   for smooth signals, but also the costliest. The Lanczos filter is
-     *   susceptible to ringing when the input array contains discontinuities.
+     * - ``"lanczos"``: use a windowed Lanczos filter that queries 6 neighbors
+     *   to reconstruct each output sample when upsampling. This is the best
+     *   filter for smooth signals, but also the costliest. The Lanczos filter
+     *   is susceptible to ringing when the input array contains discontinuities.
+     *
+     * - ``"gaussian"``: use a Gaussian filter that queries 4 neighbors to
+     *    reconstruct each output sample when upsampling. The Gaussian has a
+     *    standard deviation of 0.5 and is truncated after 4 standard
+     *    deviations. This filter is mainly useful when intending to blur a signal.
+     *
+     * The optional ``radius_scale`` parameter can be used to scale the
+     * filter kernel radius.
      */
-    Resampler(uint32_t source_res, uint32_t target_res, const char *filter);
+    Resampler(uint32_t source_res, uint32_t target_res, const char *filter,
+              double radius_scale = 1.0);
 
     /**
      * \brief Construct a Resampler using a custom filter kernel.
diff --git a/src/extra/resample.cpp b/src/extra/resample.cpp
@@ -10,6 +10,7 @@
 
 #include <drjit/resample.h>
 #include <drjit/while_loop.h>
+#include <drjit/math.h>
 #include <nanothread/nanothread.h>
 #include <cmath>
 #include <algorithm>
@@ -28,7 +29,7 @@ struct Resampler::Impl {
     mutable std::any weights_cache;
 
     Impl(uint32_t source_res, uint32_t target_res, Resampler::Filter filter,
-         const void *payload, double radius)
+         const void *payload, double radius, double radius_scale)
         : source_res(source_res), target_res(target_res) {
         if (source_res == 0 || target_res == 0)
             throw std::runtime_error("drjit.Resampler(): source/target resolution cannot be zero!");
@@ -41,7 +42,14 @@ struct Resampler::Impl {
             radius *= scale;
         }
 
+        if (source_res == target_res) {
+            // Convolution mode, adapt to filter size scale factor
+            radius *= radius_scale;
+            filter_scale /= radius_scale;
+        }
+
         taps = (uint32_t) std::ceil(radius * 2);
+
         offset = unique_ptr<uint32_t[]>(new uint32_t[target_res]);
         weights = unique_ptr<double[]>(new double[taps * target_res]);
 
@@ -121,7 +129,7 @@ static inline double sinc(double x) {
     return std::sin(x) / x;
 }
 
-Resampler::Resampler(uint32_t source_res, uint32_t target_res, const char *filter) {
+Resampler::Resampler(uint32_t source_res, uint32_t target_res, const char *filter, double radius_scale) {
     Resampler::Filter filter_cb = nullptr;
     double radius = 0.0;
 
@@ -167,18 +175,29 @@ Resampler::Resampler(uint32_t source_res, uint32_t target_res, const char *filte
             return sinc(x) * sinc(x * (1.0 / 3.0));
         };
         radius = 3.f;
+    } else if (strcmp(filter, "gaussian") == 0) {
+        filter_cb = [](double x, const void *) -> double {
+            if (x < -2.0 || x >= 2.0)
+                return 0.0;
+            double stddev = .5,
+                   alpha = -1.0 / (2.0 * square(stddev));
+            return maximum(0.f, exp(alpha * square(x)) - exp(alpha * square(2.0)));
+
+
+        };
+        radius = 2.f;
     } else {
         throw std::runtime_error("'filter': unknown value ('box', 'linear', "
                                  "'hamming', 'cubic', and 'lanczos' are supported).");
     }
 
-    d = new Impl(source_res, target_res, filter_cb, nullptr, radius);
+    d = new Impl(source_res, target_res, filter_cb, nullptr, radius, radius_scale);
 }
 
 Resampler::Resampler(uint32_t source_res, uint32_t target_res,
                      Resampler::Filter filter, const void *payload,
                      double radius)
-    : d(new Impl(source_res, target_res, filter, payload, radius)) {
+    : d(new Impl(source_res, target_res, filter, payload, radius, 1.0)) {
 }
 
 Resampler::~Resampler() { }
diff --git a/src/python/resample.cpp b/src/python/resample.cpp
@@ -10,6 +10,7 @@
 
 #include <drjit/resample.h>
 #include <nanobind/stl/string.h>
+#include <nanobind/stl/optional.h>
 #include "common.h"
 
 void export_resample(nb::module_ &) {
@@ -18,19 +19,19 @@ void export_resample(nb::module_ &) {
 
     auto resampler = nb::class_<Resampler>(detail, "Resampler")
         .def("__init__", [](Resampler *self, uint32_t source_res, uint32_t target_res,
-                            const char *filter, nb::handle filter_radius) {
-                 if (!filter_radius.is_none())
+                            const char *filter, std::optional<double> filter_radius, bool convolve) {
+                 if (filter_radius.has_value() && !convolve)
                      nb::raise("drjit.Resampler(): 'filter_radius' must be None when using a filter preset.");
-                 new (self) Resampler(source_res, target_res, filter);
-             }, "source_res"_a, "target_res"_a, "filter"_a, "filter_radius"_a = nb::none())
+                 new (self) Resampler(source_res, target_res, filter, filter_radius.has_value() ? filter_radius.value() : 1.0);
+             }, "source_res"_a, "target_res"_a, "filter"_a, "filter_radius"_a = nb::none(), "convolve"_a = false)
         .def("__init__", [](Resampler *self, uint32_t source_res, uint32_t target_res,
-                            nb::typed<nb::callable, float, float> filter, double filter_radius) {
+                            nb::typed<nb::callable, float, float> filter, double filter_radius, bool) {
                  Resampler::Filter filter_cb = [](double v, const void *ptr) -> double {
                      return nb::cast<double>(nb::handle((PyObject *) ptr)(v));
                  };
                  new (self) Resampler(source_res, target_res, filter_cb,
                                       filter.ptr(), filter_radius);
-             }, "source_res"_a, "target_res"_a, "filter"_a, "filter_radius"_a)
+             }, "source_res"_a, "target_res"_a, "filter"_a, "filter_radius"_a, "convolve"_a = false)
 #if defined(DRJIT_ENABLE_CUDA)
          .def("resample_fwd",
               (dr::CUDAArray<dr::half>(Resampler::*)(const dr::CUDAArray<dr::half> &, uint32_t) const) &Resampler::resample_fwd,
diff --git a/tests/test_resample.py b/tests/test_resample.py
@@ -102,3 +102,14 @@ def filt(x):
     )
 
     assert dr.allclose(r1, r2)
+
+# Test filtering a signal without changing its resolution
+@pytest.test_arrays('float, -jit, shape=(*)')
+def test07_convolve(t):
+    x = t(1, 2, 10, 100)
+    y = dr.convolve(x, 'linear', 1)
+    assert dr.allclose(x, y)
+
+    y = dr.convolve(x, 'linear', 2)
+    z = t((1+2*.5)/1.5, (1*.5+2+10*.5)/2, (2*.5+10+100*.5)/2, (100+10*.5)/1.5)
+    assert dr.allclose(y, z)