Add input type validation to raven filter

neon60 · neon60 · commit baa56692604b · 2024-11-09T23:53:13.000+01:00
diff --git a/docs/source/examples/raven_filter_example.py b/docs/source/examples/raven_filter_example.py
@@ -23,7 +23,7 @@
 
 sino_shape = sinogram.shape
 
-sinogram_stack = cp.stack([sinogram] * 20, axis=1)
+sinogram_stack = cp.stack([sinogram] * 5, axis=1)
 
 print("The shape of the sinogram stack is {}".format(cp.shape(sinogram_stack)))
 
diff --git a/httomolibgpu/cuda_kernels/raven_filter.cu b/httomolibgpu/cuda_kernels/raven_filter.cu
@@ -1,9 +1,10 @@
 #include <cupy/complex.cuh>
 
-extern "C" __global__ void 
+template <typename Type>
+__global__ void 
 raven_filter(
-  complex<float> *input,
-  complex<float> *output,
+  complex<Type> *input,
+  complex<Type> *output,
   int width, int images, int height, 
   int u0, int n, int v0) {
 
@@ -17,17 +18,21 @@ raven_filter(
   int centerx = width / 2;
   int centerz = height / 2;
 
-  complex<float> value = input[pz * width * images + py * width + px];
+  long long index = static_cast<long long>(px) + 
+                    width * static_cast<long long>(py) + 
+                    width * images * static_cast<long long>(pz);
+
+  complex<Type> value = input[index];
   if( pz >= (centerz - v0) && pz < (centerz + v0 + 1) ) {
     
     // +1 needed to match with CPU implementation
-    float base = float(px - centerx + 1) / u0;
-    float power = base;
+    Type base = Type(px - centerx + 1) / u0;
+    Type power = base;
     for( int i = 1; i < 2 * n; i++ )
       power *= base;
 
-    float filtered_value = 1.f / (1.f + power);
-    value *= complex<float>(filtered_value, filtered_value);
+    Type filtered_value = 1.f / (1.f + power);
+    value *= complex<Type>(filtered_value, filtered_value);
   }
 
   // ifftshifting positions
@@ -36,5 +41,9 @@ raven_filter(
   int outX = (px + xshift) % width;
   int outZ = (pz + zshift) % height;
 
-  output[outZ * width * images + py * width + outX] = value;
+  long long outIndex = static_cast<long long>(outX) + 
+                       width * static_cast<long long>(py) + 
+                       width * images * static_cast<long long>(outZ);
+
+  output[outIndex] = value;
 }
diff --git a/httomolibgpu/prep/stripe.py b/httomolibgpu/prep/stripe.py
@@ -375,9 +375,47 @@ def raven_filter(
         pad_x: int = 20,
         pad_method: str = "edge"):
     """
-    Raven filter
+    Applies raven filter to a 3D CuPy array. For more detailed information, see :ref:`method_raven_filter`.
+
+    Parameters
+    ----------
+    data : cp.ndarray
+        Input CuPy 3D array either float32 or uint16 data type.
+
+    pad_y : int, optional
+        Pad the top and bottom of projections.
+
+    pad_x : int, optional
+        Pad the left and right of projections.
+
+    pad_method : str, optional
+        Numpy pad method to use.
+
+    uvalue : int, optional
+        The shape of filter.
+
+    nvalue : int, optional
+        The shape of filter.
+
+    vvalue : int, optional
+        The number of rows to be applied the filter
+
+    Returns
+    -------
+    ndarray
+        Raven filtered 3D CuPy array in float32 data type.
+
+    Raises
+    ------
+    ValueError
+        If the input array is not three dimensional.
     """
 
+    input_type = sinogram.dtype
+
+    if input_type not in ["float32", "float64"]:
+        raise ValueError("The input data should be either float32 or float64 data type")
+
     # Padding of the sinogram
     sinogram = cp.pad(sinogram, ((pad_y, pad_y), (0, 0), (pad_x, pad_x)), mode=pad_method)
 
@@ -388,6 +426,11 @@ def raven_filter(
     # Setup various values for the filter
     height, images, width = sinogram.shape
 
+    # Set the input type of the kernel
+    kernel_args = "raven_filter<{0}>".format(
+        "float" if input_type == "float32" else "double"
+    )
+
     # setting grid/block parameters
     block_x = 128
     block_dims = (block_x, 1, 1)
@@ -397,8 +440,8 @@ def raven_filter(
     grid_dims = (grid_x, grid_y, grid_z)
     params = (fft_data_shifted, fft_data, width, images, height, uvalue, nvalue, vvalue)
 
-    raven_module = load_cuda_module("raven_filter")
-    raven_filt = raven_module.get_function("raven_filter")
+    raven_module = load_cuda_module("raven_filter", name_expressions=[kernel_args])
+    raven_filt = raven_module.get_function(kernel_args)
     
     raven_filt(grid_dims, block_dims, params)
     
diff --git a/tests/test_prep/stripe_cpu_reference.py b/tests/test_prep/stripe_cpu_reference.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pyfftw
+import pyfftw.interfaces.numpy_fft as fft
+
+def raven_filter_cpu(
+        sinogram,
+        uvalue: int = 20,
+        nvalue: int = 4,
+        vvalue: int = 2,
+        pad_y: int = 20,
+        pad_x: int = 20,
+        pad_method: str = "edge"):
+    
+    # Parameters
+    v0 = vvalue
+    n = nvalue
+    u0 = uvalue
+
+    # Make a padded copy
+    sinogram_padded = np.pad(sinogram, ((pad_y,pad_y), (0, 0), (pad_x,pad_x)), pad_method)
+    
+    # Size
+    height, images, width = sinogram_padded.shape
+    
+    # Generate filter function
+    centerx = np.ceil(width / 2.0) - 1.0
+    centery = np.int16(np.ceil(height / 2.0) - 1)
+    row1 = centery - v0
+    row2 = centery + v0 + 1
+    listx = np.arange(width) - centerx
+    filtershape = 1.0 / (1.0 + np.power(listx / u0, 2 * n))
+    filtershapepad2d = np.zeros((row2 - row1, filtershape.size))
+    filtershapepad2d[:] = np.float64(filtershape)
+    filtercomplex = filtershapepad2d + filtershapepad2d * 1j
+    
+    # Generate filter objects
+    a = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
+    b = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
+    c = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
+    d = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
+    fft_object  = pyfftw.FFTW(a, b, axes=(0, 2))
+    ifft_object = pyfftw.FFTW(c, d, axes=(0, 2), direction='FFTW_BACKWARD')
+    
+    sino = fft.fftshift(fft_object(sinogram_padded), axes=(0, 2))
+    for m in range(sino.shape[1]):
+        sino[row1:row2, m] = sino[row1:row2, m] * filtercomplex
+    sino = ifft_object(fft.ifftshift(sino, axes=(0, 2)))
+    sinogram = sino[pad_y:height-pad_y, :, pad_x:width-pad_x]
+
+    return sinogram.real
diff --git a/tests/test_prep/test_stripe.py b/tests/test_prep/test_stripe.py
@@ -3,8 +3,7 @@
 from cupy.cuda import nvtx
 import numpy as np
 import pytest
-import pyfftw
-import pyfftw.interfaces.numpy_fft as fft
+
 from httomolibgpu.prep.normalize import normalize
 from httomolibgpu.prep.stripe import (
     remove_stripe_based_sorting,
@@ -13,53 +12,7 @@
     raven_filter,
 )
 from numpy.testing import assert_allclose
-
-def raven_filter_cpu(
-        sinogram,
-        uvalue: int = 20,
-        nvalue: int = 4,
-        vvalue: int = 2,
-        pad_y: int = 20,
-        pad_x: int = 20,
-        pad_method: str = "edge"):
-    
-    # Parameters
-    v0 = vvalue
-    n = nvalue
-    u0 = uvalue
-
-    # Make a padded copy
-    sinogram_padded = cp.pad(sinogram, ((pad_y,pad_y), (0, 0), (pad_x,pad_x)), pad_method).get()
-    
-    # Size
-    height, images, width = sinogram_padded.shape
-    
-    # Generate filter function
-    centerx = np.ceil(width / 2.0) - 1.0
-    centery = np.int16(np.ceil(height / 2.0) - 1)
-    row1 = centery - v0
-    row2 = centery + v0 + 1
-    listx = np.arange(width) - centerx
-    filtershape = 1.0 / (1.0 + np.power(listx / u0, 2 * n))
-    filtershapepad2d = np.zeros((row2 - row1, filtershape.size))
-    filtershapepad2d[:] = np.float64(filtershape)
-    filtercomplex = filtershapepad2d + filtershapepad2d * 1j
-    
-    # Generate filter objects
-    a = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
-    b = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
-    c = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
-    d = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
-    fft_object  = pyfftw.FFTW(a, b, axes=(0, 2))
-    ifft_object = pyfftw.FFTW(c, d, axes=(0, 2), direction='FFTW_BACKWARD')
-    
-    sino = fft.fftshift(fft_object(sinogram_padded), axes=(0, 2))
-    for m in range(sino.shape[1]):
-        sino[row1:row2, m] = sino[row1:row2, m] * filtercomplex
-    sino = ifft_object(fft.ifftshift(sino, axes=(0, 2)))
-    sinogram = sino[pad_y:height-pad_y, :, pad_x:width-pad_x]
-
-    return sinogram.real
+from .stripe_cpu_reference import raven_filter_cpu
 
 def test_remove_stripe_ti_on_data(data, flats, darks):
     # --- testing the CuPy implementation from TomoCupy ---#
@@ -119,10 +72,10 @@ def test_stripe_raven_cupy(data, flats, darks):
 
     data = normalize(data, flats, darks, cutoff=10, minus_log=True)
 
-    data_after_raven_gpu = raven_filter(cp.copy(data)).get()
-    data_after_raven_cpu = raven_filter_cpu(cp.copy(data))
+    data_after_raven_gpu = raven_filter(cp.copy(data))
+    data_after_raven_cpu = cp.asarray(raven_filter_cpu(cp.copy(data).get()))
 
-    assert_allclose(data_after_raven_cpu, data_after_raven_gpu, 0, atol=4e-01)
+    cp.testing.assert_allclose(data_after_raven_cpu, data_after_raven_gpu, rtol=0, atol=4e-01)
 
     data = None  #: free up GPU memory
     # make sure the output is float32
@@ -210,11 +163,11 @@ def test_raven_filter_cpu_performance(ensure_clean_memory):
     data = cp.asarray(data_host, dtype=np.float32)
 
     # do a cold run first
-    raven_filter_cpu(cp.copy(data))
+    raven_filter_cpu(cp.copy(data).get())
 
     start = time.perf_counter_ns()
     for _ in range(10):
-        raven_filter_cpu(cp.copy(data))
+        raven_filter_cpu(cp.copy(data).get())
 
     duration_ms = float(time.perf_counter_ns() - start) * 1e-6 / 10