adding data validation modules, tests and correcting all functions in misc

dkazanc · dkazanc · commit 3302b0916823 · 2025-06-02T17:36:30.000+01:00
diff --git a/httomolibgpu/misc/corr.py b/httomolibgpu/misc/corr.py
@@ -18,9 +18,7 @@
 # Created By  : Tomography Team at DLS <scientificsoftware@diamond.ac.uk>
 # Created Date: 21/October/2022
 # ---------------------------------------------------------------------------
-""" Module for data correction. For more detailed information see :ref:`data_correction_module`.
-
-"""
+"""Module for data correction. For more detailed information see :ref:`data_correction_module`."""
 
 import numpy as np
 from typing import Union
@@ -38,6 +36,7 @@
 else:
     load_cuda_module = Mock()
 
+from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
 
 __all__ = [
     "median_filter",
@@ -74,7 +73,6 @@ def median_filter(
         If the input array is not three dimensional.
     """
     input_type = data.dtype
-
     if input_type not in ["float32", "uint16"]:
         raise ValueError("The input data should be either float32 or uint16 data type")
 
@@ -84,6 +82,20 @@ def median_filter(
     else:
         raise ValueError("The input array must be a 3D array")
 
+    verbosity_enabled = True  # printing the data-related warnings
+    method_name = "median_filter"
+
+    data = _naninfs_check(
+        data, correction=True, verbosity=verbosity_enabled, method_name=method_name
+    )
+
+    _zeros_check(
+        data,
+        verbosity=verbosity_enabled,
+        percentage_threshold=50,
+        method_name=method_name,
+    )
+
     if kernel_size not in [3, 5, 7, 9, 11, 13]:
         raise ValueError("Please select a correct kernel size: 3, 5, 7, 9, 11, 13")
 
diff --git a/httomolibgpu/misc/denoise.py b/httomolibgpu/misc/denoise.py
@@ -18,8 +18,7 @@
 # Created By  : Tomography Team at DLS <scientificsoftware@diamond.ac.uk>
 # Created Date: 18/December/2024
 # ---------------------------------------------------------------------------
-""" Module for data denoising. For more detailed information see :ref:`data_denoising_module`.
-"""
+"""Module for data denoising. For more detailed information see :ref:`data_denoising_module`."""
 
 import numpy as np
 from typing import Union, Optional
@@ -32,6 +31,8 @@
 from numpy import float32
 from unittest.mock import Mock
 
+from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
+
 if cupy_run:
     from ccpi.filters.regularisersCuPy import ROF_TV, PD_TV
 else:
@@ -81,6 +82,19 @@ def total_variation_ROF(
     ValueError
         If the input array is not float32 data type.
     """
+    verbosity_enabled = True  # printing the data-related warnings
+    method_name = "total_variation_ROF"
+
+    data = _naninfs_check(
+        data, correction=True, verbosity=verbosity_enabled, method_name=method_name
+    )
+
+    _zeros_check(
+        data,
+        verbosity=verbosity_enabled,
+        percentage_threshold=50,
+        method_name=method_name,
+    )
 
     return ROF_TV(
         data, regularisation_parameter, iterations, time_marching_parameter, gpu_id
@@ -126,6 +140,19 @@ def total_variation_PD(
     ValueError
         If the input array is not float32 data type.
     """
+    verbosity_enabled = True  # printing the data-related warnings
+    method_name = "total_variation_PD"
+
+    data = _naninfs_check(
+        data, correction=True, verbosity=verbosity_enabled, method_name=method_name
+    )
+
+    _zeros_check(
+        data,
+        verbosity=verbosity_enabled,
+        percentage_threshold=50,
+        method_name=method_name,
+    )
 
     methodTV = 0
     if not isotropic:
diff --git a/httomolibgpu/misc/morph.py b/httomolibgpu/misc/morph.py
@@ -35,6 +35,8 @@
 
 from typing import Literal
 
+from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
+
 __all__ = [
     "sino_360_to_180",
     "data_resampler",
@@ -66,6 +68,20 @@ def sino_360_to_180(
     if data.ndim != 3:
         raise ValueError("only 3D data is supported")
 
+    verbosity_enabled = True  # printing the data-related warnings
+    method_name = "sino_360_to_180"
+
+    data = _naninfs_check(
+        data, correction=True, verbosity=verbosity_enabled, method_name=method_name
+    )
+
+    _zeros_check(
+        data,
+        verbosity=verbosity_enabled,
+        percentage_threshold=50,
+        method_name=method_name,
+    )
+
     dx, dy, dz = data.shape
 
     overlap = int(np.round(overlap))
@@ -136,6 +152,20 @@ def data_resampler(
         data = cp.expand_dims(data, 1)
         axis = 1
 
+    verbosity_enabled = True  # printing the data-related warnings
+    method_name = "data_resampler"
+
+    data = _naninfs_check(
+        data, correction=True, verbosity=verbosity_enabled, method_name=method_name
+    )
+
+    _zeros_check(
+        data,
+        verbosity=verbosity_enabled,
+        percentage_threshold=50,
+        method_name=method_name,
+    )
+
     N, M, Z = cp.shape(data)
 
     if axis == 0:
diff --git a/httomolibgpu/misc/rescale.py b/httomolibgpu/misc/rescale.py
@@ -18,9 +18,7 @@
 # Created By  : Tomography Team at DLS <scientificsoftware@diamond.ac.uk>
 # Created Date: 1 March 2024
 # ---------------------------------------------------------------------------
-""" Module for data rescaling. For more detailed information see :ref:`data_rescale_module`.
-
-"""
+"""Module for data rescaling. For more detailed information see :ref:`data_rescale_module`."""
 
 import numpy as np
 from httomolibgpu import cupywrapper
@@ -30,6 +28,8 @@
 
 from typing import Literal, Optional, Tuple, Union
 
+from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
+
 __all__ = [
     "rescale_to_int",
 ]
@@ -80,6 +80,20 @@ def rescale_to_int(
     else:
         output_dtype = np.uint32
 
+    verbosity_enabled = True  # printing the data-related warnings
+    method_name = "rescale_to_int"
+
+    data = _naninfs_check(
+        data, correction=True, verbosity=verbosity_enabled, method_name=method_name
+    )
+
+    _zeros_check(
+        data,
+        verbosity=verbosity_enabled,
+        percentage_threshold=50,
+        method_name=method_name,
+    )
+
     if cupy_run:
         xp = cp.get_array_module(data)
     else:
@@ -109,7 +123,6 @@ def rescale_to_int(
     if xp.__name__ == "numpy":
         if input_max == pow(2, 32):
             input_max -= 1
-        data[np.logical_not(np.isfinite(data))] = 0
         res = np.copy(data.astype(float))
         res[data.astype(float) < input_min] = int(input_min)
         res[data.astype(float) > input_max] = int(input_max)
diff --git a/httomolibgpu/misc/supp_func.py b/httomolibgpu/misc/supp_func.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ---------------------------------------------------------------------------
+# Copyright 2022 Diamond Light Source Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ---------------------------------------------------------------------------
+# Created By  : Tomography Team at DLS <scientificsoftware@diamond.ac.uk>
+# Created Date: 02/June/2025
+# ---------------------------------------------------------------------------
+"""This is a collection of supplementary functions (utils) to perform various data checks"""
+
+from httomolibgpu import cupywrapper
+from typing import Optional
+
+cp = cupywrapper.cp
+cupy_run = cupywrapper.cupy_run
+
+
+def _naninfs_check(
+    data: cp.ndarray,
+    correction: bool = True,
+    verbosity: bool = True,
+    method_name: Optional[str] = None,
+) -> cp.ndarray:
+    """
+    Function finds NaN's, +-Inf's in the input data and then prints the warning and correct the data
+
+    Parameters
+    ----------
+    data : cp.ndarray
+        Input CuPy array either float32 or uint16 data type.
+    correction : bool
+        If correction is enabled then Inf's and NaN's will be replaced by zeros.
+    verbosity : bool
+        If enabled, then the printing of the warning happens when data contains infs or nans
+    method_name : str, optional.
+        Method's name for which input data is tested.
+
+    Returns
+    -------
+    ndarray
+        Corrected (or not) CuPy array.
+    """
+    if cupy_run:
+        xp = cp.get_array_module(data)
+    else:
+        import numpy as xp
+
+    if not xp.all(xp.isfinite(data)):
+        if verbosity:
+            print(
+                f"Warning!!! Input data to method: {method_name} contains Inf's or/and NaN's."
+            )
+        if correction:
+            print(
+                "Inf's or/and NaN's will be corrected to finite integers (zeros). It is advisable to check the correctness of the input."
+            )
+            xp.nan_to_num(data, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
+    return data
+
+
+def _zeros_check(
+    data: cp.ndarray,
+    verbosity: bool = True,
+    percentage_threshold: float = 50,
+    method_name: Optional[str] = None,
+) -> bool:
+    """
+    Function finds NaN's, +-Inf's in the input data and then prints the warning and correct the data
+
+    Parameters
+    ----------
+    data : cp.ndarray
+        Input CuPy array either float32 or uint16 data type.
+    verbosity : bool
+        If enabled, then the printing of the warning happens when data contains infs or nans
+    percentage_threshold: float:
+        If the number of zeros in input data is more than the percentage of all data points, then print the data warning
+    method_name : str, optional.
+        Method's name for which input data is tested.
+
+    Returns
+    -------
+    bool
+        True if the data contains too many zeros
+    """
+    if cupy_run:
+        xp = cp.get_array_module(data)
+    else:
+        import numpy as xp
+
+    warning_zeros = False
+    zero_elements_total = int(xp.count_nonzero(data == 0))
+    nonzero_elements_total = len(data.flatten())
+    if (zero_elements_total / nonzero_elements_total) * 100 >= percentage_threshold:
+        warning_zeros = True
+        if verbosity:
+            print(
+                f"Warning!!! Input data to method: {method_name} contains more than {percentage_threshold} percent of zeros."
+            )
+
+    return warning_zeros
diff --git a/tests/test_misc/test_supp_func.py b/tests/test_misc/test_supp_func.py