Skip to content

Commit 3302b09

Browse files
committed
adding data validation modules, tests and correcting all functions in misc
1 parent 0c2b5a2 commit 3302b09

File tree

6 files changed

+347
-10
lines changed

6 files changed

+347
-10
lines changed

httomolibgpu/misc/corr.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
# Created By : Tomography Team at DLS <[email protected]>
1919
# Created Date: 21/October/2022
2020
# ---------------------------------------------------------------------------
21-
""" Module for data correction. For more detailed information see :ref:`data_correction_module`.
22-
23-
"""
21+
"""Module for data correction. For more detailed information see :ref:`data_correction_module`."""
2422

2523
import numpy as np
2624
from typing import Union
@@ -38,6 +36,7 @@
3836
else:
3937
load_cuda_module = Mock()
4038

39+
from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
4140

4241
__all__ = [
4342
"median_filter",
@@ -74,7 +73,6 @@ def median_filter(
7473
If the input array is not three dimensional.
7574
"""
7675
input_type = data.dtype
77-
7876
if input_type not in ["float32", "uint16"]:
7977
raise ValueError("The input data should be either float32 or uint16 data type")
8078

@@ -84,6 +82,20 @@ def median_filter(
8482
else:
8583
raise ValueError("The input array must be a 3D array")
8684

85+
verbosity_enabled = True # printing the data-related warnings
86+
method_name = "median_filter"
87+
88+
data = _naninfs_check(
89+
data, correction=True, verbosity=verbosity_enabled, method_name=method_name
90+
)
91+
92+
_zeros_check(
93+
data,
94+
verbosity=verbosity_enabled,
95+
percentage_threshold=50,
96+
method_name=method_name,
97+
)
98+
8799
if kernel_size not in [3, 5, 7, 9, 11, 13]:
88100
raise ValueError("Please select a correct kernel size: 3, 5, 7, 9, 11, 13")
89101

httomolibgpu/misc/denoise.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
# Created By : Tomography Team at DLS <[email protected]>
1919
# Created Date: 18/December/2024
2020
# ---------------------------------------------------------------------------
21-
""" Module for data denoising. For more detailed information see :ref:`data_denoising_module`.
22-
"""
21+
"""Module for data denoising. For more detailed information see :ref:`data_denoising_module`."""
2322

2423
import numpy as np
2524
from typing import Union, Optional
@@ -32,6 +31,8 @@
3231
from numpy import float32
3332
from unittest.mock import Mock
3433

34+
from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
35+
3536
if cupy_run:
3637
from ccpi.filters.regularisersCuPy import ROF_TV, PD_TV
3738
else:
@@ -81,6 +82,19 @@ def total_variation_ROF(
8182
ValueError
8283
If the input array is not float32 data type.
8384
"""
85+
verbosity_enabled = True # printing the data-related warnings
86+
method_name = "total_variation_ROF"
87+
88+
data = _naninfs_check(
89+
data, correction=True, verbosity=verbosity_enabled, method_name=method_name
90+
)
91+
92+
_zeros_check(
93+
data,
94+
verbosity=verbosity_enabled,
95+
percentage_threshold=50,
96+
method_name=method_name,
97+
)
8498

8599
return ROF_TV(
86100
data, regularisation_parameter, iterations, time_marching_parameter, gpu_id
@@ -126,6 +140,19 @@ def total_variation_PD(
126140
ValueError
127141
If the input array is not float32 data type.
128142
"""
143+
verbosity_enabled = True # printing the data-related warnings
144+
method_name = "total_variation_PD"
145+
146+
data = _naninfs_check(
147+
data, correction=True, verbosity=verbosity_enabled, method_name=method_name
148+
)
149+
150+
_zeros_check(
151+
data,
152+
verbosity=verbosity_enabled,
153+
percentage_threshold=50,
154+
method_name=method_name,
155+
)
129156

130157
methodTV = 0
131158
if not isotropic:

httomolibgpu/misc/morph.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636
from typing import Literal
3737

38+
from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
39+
3840
__all__ = [
3941
"sino_360_to_180",
4042
"data_resampler",
@@ -66,6 +68,20 @@ def sino_360_to_180(
6668
if data.ndim != 3:
6769
raise ValueError("only 3D data is supported")
6870

71+
verbosity_enabled = True # printing the data-related warnings
72+
method_name = "sino_360_to_180"
73+
74+
data = _naninfs_check(
75+
data, correction=True, verbosity=verbosity_enabled, method_name=method_name
76+
)
77+
78+
_zeros_check(
79+
data,
80+
verbosity=verbosity_enabled,
81+
percentage_threshold=50,
82+
method_name=method_name,
83+
)
84+
6985
dx, dy, dz = data.shape
7086

7187
overlap = int(np.round(overlap))
@@ -136,6 +152,20 @@ def data_resampler(
136152
data = cp.expand_dims(data, 1)
137153
axis = 1
138154

155+
verbosity_enabled = True # printing the data-related warnings
156+
method_name = "data_resampler"
157+
158+
data = _naninfs_check(
159+
data, correction=True, verbosity=verbosity_enabled, method_name=method_name
160+
)
161+
162+
_zeros_check(
163+
data,
164+
verbosity=verbosity_enabled,
165+
percentage_threshold=50,
166+
method_name=method_name,
167+
)
168+
139169
N, M, Z = cp.shape(data)
140170

141171
if axis == 0:

httomolibgpu/misc/rescale.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
# Created By : Tomography Team at DLS <[email protected]>
1919
# Created Date: 1 March 2024
2020
# ---------------------------------------------------------------------------
21-
""" Module for data rescaling. For more detailed information see :ref:`data_rescale_module`.
22-
23-
"""
21+
"""Module for data rescaling. For more detailed information see :ref:`data_rescale_module`."""
2422

2523
import numpy as np
2624
from httomolibgpu import cupywrapper
@@ -30,6 +28,8 @@
3028

3129
from typing import Literal, Optional, Tuple, Union
3230

31+
from httomolibgpu.misc.supp_func import _naninfs_check, _zeros_check
32+
3333
__all__ = [
3434
"rescale_to_int",
3535
]
@@ -80,6 +80,20 @@ def rescale_to_int(
8080
else:
8181
output_dtype = np.uint32
8282

83+
verbosity_enabled = True # printing the data-related warnings
84+
method_name = "rescale_to_int"
85+
86+
data = _naninfs_check(
87+
data, correction=True, verbosity=verbosity_enabled, method_name=method_name
88+
)
89+
90+
_zeros_check(
91+
data,
92+
verbosity=verbosity_enabled,
93+
percentage_threshold=50,
94+
method_name=method_name,
95+
)
96+
8397
if cupy_run:
8498
xp = cp.get_array_module(data)
8599
else:
@@ -109,7 +123,6 @@ def rescale_to_int(
109123
if xp.__name__ == "numpy":
110124
if input_max == pow(2, 32):
111125
input_max -= 1
112-
data[np.logical_not(np.isfinite(data))] = 0
113126
res = np.copy(data.astype(float))
114127
res[data.astype(float) < input_min] = int(input_min)
115128
res[data.astype(float) > input_max] = int(input_max)

httomolibgpu/misc/supp_func.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
# ---------------------------------------------------------------------------
4+
# Copyright 2022 Diamond Light Source Ltd.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
# ---------------------------------------------------------------------------
18+
# Created By : Tomography Team at DLS <[email protected]>
19+
# Created Date: 02/June/2025
20+
# ---------------------------------------------------------------------------
21+
"""This is a collection of supplementary functions (utils) to perform various data checks"""
22+
23+
from httomolibgpu import cupywrapper
24+
from typing import Optional
25+
26+
cp = cupywrapper.cp
27+
cupy_run = cupywrapper.cupy_run
28+
29+
30+
def _naninfs_check(
31+
data: cp.ndarray,
32+
correction: bool = True,
33+
verbosity: bool = True,
34+
method_name: Optional[str] = None,
35+
) -> cp.ndarray:
36+
"""
37+
Function finds NaN's, +-Inf's in the input data and then prints the warning and correct the data
38+
39+
Parameters
40+
----------
41+
data : cp.ndarray
42+
Input CuPy array either float32 or uint16 data type.
43+
correction : bool
44+
If correction is enabled then Inf's and NaN's will be replaced by zeros.
45+
verbosity : bool
46+
If enabled, then the printing of the warning happens when data contains infs or nans
47+
method_name : str, optional.
48+
Method's name for which input data is tested.
49+
50+
Returns
51+
-------
52+
ndarray
53+
Corrected (or not) CuPy array.
54+
"""
55+
if cupy_run:
56+
xp = cp.get_array_module(data)
57+
else:
58+
import numpy as xp
59+
60+
if not xp.all(xp.isfinite(data)):
61+
if verbosity:
62+
print(
63+
f"Warning!!! Input data to method: {method_name} contains Inf's or/and NaN's."
64+
)
65+
if correction:
66+
print(
67+
"Inf's or/and NaN's will be corrected to finite integers (zeros). It is advisable to check the correctness of the input."
68+
)
69+
xp.nan_to_num(data, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
70+
return data
71+
72+
73+
def _zeros_check(
74+
data: cp.ndarray,
75+
verbosity: bool = True,
76+
percentage_threshold: float = 50,
77+
method_name: Optional[str] = None,
78+
) -> bool:
79+
"""
80+
Function finds NaN's, +-Inf's in the input data and then prints the warning and correct the data
81+
82+
Parameters
83+
----------
84+
data : cp.ndarray
85+
Input CuPy array either float32 or uint16 data type.
86+
verbosity : bool
87+
If enabled, then the printing of the warning happens when data contains infs or nans
88+
percentage_threshold: float:
89+
If the number of zeros in input data is more than the percentage of all data points, then print the data warning
90+
method_name : str, optional.
91+
Method's name for which input data is tested.
92+
93+
Returns
94+
-------
95+
bool
96+
True if the data contains too many zeros
97+
"""
98+
if cupy_run:
99+
xp = cp.get_array_module(data)
100+
else:
101+
import numpy as xp
102+
103+
warning_zeros = False
104+
zero_elements_total = int(xp.count_nonzero(data == 0))
105+
nonzero_elements_total = len(data.flatten())
106+
if (zero_elements_total / nonzero_elements_total) * 100 >= percentage_threshold:
107+
warning_zeros = True
108+
if verbosity:
109+
print(
110+
f"Warning!!! Input data to method: {method_name} contains more than {percentage_threshold} percent of zeros."
111+
)
112+
113+
return warning_zeros

0 commit comments

Comments
 (0)