Skip to content

Commit 6e3cae8

Browse files
committed
Added sliding window and dvars(mask) approaches to outfind.py for finding outliers, modified dvars() and added a new function dvars_voxel() in metrics.py and modified mad_time_detector() in detectors.py
1 parent fc2421d commit 6e3cae8

File tree

3 files changed

+103
-20
lines changed

3 files changed

+103
-20
lines changed

findoutlie/detectors.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def mad_voxel_detector(img,threshold=3.5):
4545
outlier_tf = np.abs(img-med)>(threshold*mad)
4646
return outlier_tf
4747

48-
def mad_time_detector(measures, threshold=3.5):
48+
def mad_time_detector(measures, lower_bound, threshold=3.5):
4949
""" Detect outliers in 'measures' using median absolute deviation.
5050
Returns 1D vector of same length as 'measures', where True means the corresponsding
5151
value in 'measures' is an outlier.
@@ -71,7 +71,10 @@ def mad_time_detector(measures, threshold=3.5):
7171
# Calculate median absoulte deviation of measures
7272
mad = np.median(np.abs(measures-med))
7373
# Calculate the outliers
74-
outlier_tf = measures>med+threshold*mad
74+
if lower_bound:
75+
outlier_tf = np.abs(measures-med)>threshold*mad
76+
else:
77+
outlier_tf = measures>med+threshold*mad
7578
return outlier_tf
7679

7780
def iqr_detector(measures, iqr_proportion=1.5):

findoutlie/metrics.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,26 @@
55
# +++your code here+++
66
import numpy as np
77

8+
def dvars_voxel(voxels):
9+
""" Calculate dvars metric on 2D array with voxels in rows and time-points(volumes) in columns
10+
11+
The dvars calculation between two volumes is defined as the square root of
12+
(the mean of the (voxel differences square)).
13+
14+
Parameters
15+
----------
16+
voxels : 2D array
17+
18+
Returns
19+
-------
20+
dvals : 1D array
21+
One-dimensional array with n-1 elements, where n is the number of
22+
volumes in 'img'.
23+
"""
24+
vol_diff = voxels[..., 1:] - voxels[..., :-1]
25+
dvar_val = np.sqrt(np.mean(vol_diff ** 2, axis=0))
26+
return dvar_val
27+
828

929
def dvars(img):
1030
""" Calculate dvars metric on Nibabel image `img`
@@ -33,14 +53,8 @@ def dvars(img):
3353
data = img.get_fdata()
3454

3555
voxel_by_time = np.reshape(data, (-1, data.shape[-1]))
56+
dvar_val = dvars_voxel(voxel_by_time)
3657

37-
vol_diff = voxel_by_time[..., 1:] - voxel_by_time[..., :-1] # 2D array
38-
# print(vol_diff.shape())
39-
# print(vol_diff)
40-
# vol_diff_1D=vol_diff.flatten()
41-
dvar_val = np.sqrt(np.mean(vol_diff ** 2, axis=0))
42-
# print(dvar_val.shape())
43-
# print(dvar_val)
4458
return dvar_val
4559

4660
raise NotImplementedError("Code up this function")

findoutlie/outfind.py

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from skimage.filters import threshold_otsu
1111

12-
from .metrics import dvars
12+
from .metrics import dvars,dvars_voxel
1313
from .detectors import iqr_detector,mad_voxel_detector,mad_time_detector
1414

1515
def segment_brain(img):
@@ -27,14 +27,12 @@ def segment_brain(img):
2727
mean_img = np.mean(img, axis=-1)
2828
# calculate the threshold for segmenting brain from background
2929
threshold = threshold_otsu(mean_img)
30-
mask = np.expand_dims(mean_img > threshold, axis=1)
31-
mask_2D = np.tile(mask, (1, img.shape[-1]))
32-
thresholded_img = np.where(mask_2D, img, np.nan)
30+
mask = mean_img > threshold
3331
# filter only brain voxels
34-
brain_voxels = thresholded_img[~np.isnan(thresholded_img).all(axis=1)]
32+
brain_voxels = img[mask]
3533
return brain_voxels
3634

37-
def detect_outliers_mean_absolute_deviation_mask(fname):
35+
def detect_outliers_mad_median_absolute_deviation_mask(fname):
3836
""" Detect outliers given image file path 'filename'
3937
4038
Parameters
@@ -47,7 +45,7 @@ def detect_outliers_mean_absolute_deviation_mask(fname):
4745
outliers : array
4846
Indices of outlier volumes.
4947
"""
50-
# A mask is used to first segment the brain regions from the background, then mean absolute deviation is used to detect outliers
48+
# A mask is used to first segment the brain regions from the background, then median absolute deviation is used to detect outliers
5149
img = nib.load(fname)
5250
img_data = img.get_fdata()
5351
# reshape from 4D to 2D
@@ -59,9 +57,71 @@ def detect_outliers_mean_absolute_deviation_mask(fname):
5957
# calculate the number of outlying voxels for each time point
6058
voxel_outliers_per_time = np.nansum(outliers_voxel,axis=0)
6159
# find the outliers in the time-series
62-
outliers_time = mad_time_detector(voxel_outliers_per_time)
60+
outliers_time = mad_time_detector(voxel_outliers_per_time, lower_bound=False)
6361
# Return indices of True values from Boolean array.
64-
return np.nonzero(outliers_time)[0]
62+
return np.nonzero(outliers_time)
63+
64+
def detect_outliers_mad_dvars_mask(fname):
65+
""" Detect outliers given image file path 'filename'
66+
67+
Parameters
68+
----------
69+
fname : str or Path
70+
Filename of 4D image, as string or Path object
71+
72+
Returns
73+
-------
74+
outliers : array
75+
Indices of outlier volumes.
76+
"""
77+
# A mask is used to first segment the brain regions from the background, dvars is calculated and then median absolute deviation is used to detect outliers
78+
img = nib.load(fname)
79+
img_data = img.get_fdata()
80+
# reshape from 4D to 2D
81+
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
82+
# segment brain from background
83+
brain_voxels = segment_brain(img_data_2D)
84+
# calculate dvars
85+
dvs = dvars_voxel(brain_voxels)
86+
# detect outliers
87+
is_outlier = mad_time_detector(dvs, lower_bound=True)
88+
return np.nonzero(is_outlier)
89+
90+
def detect_outliers_mad_sliding_window_mask(fname):
91+
""" Detect outliers given image file path 'filename'
92+
93+
Parameters
94+
----------
95+
fname : str or Path
96+
Filename of 4D image, as string or Path object
97+
98+
Returns
99+
-------
100+
outliers : array
101+
Indices of outlier volumes.
102+
"""
103+
# A mask is used to first segment the brain regions from the background, sliding window approach is used to detect outliers in each window using median absolute deviation
104+
img = nib.load(fname)
105+
img_data = img.get_fdata()
106+
# reshape from 4D to 2D
107+
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
108+
# segment brain from background
109+
brain_voxels = segment_brain(img_data_2D)
110+
# apply sliding window
111+
overlap = 10
112+
window_length = 20
113+
outliers = []
114+
for i in range(0, brain_voxels.shape[-1],overlap):
115+
if i+window_length>=brain_voxels.shape[-1]:
116+
elements = np.mean(brain_voxels[:,i:],axis=0)
117+
outliers_1 = np.nonzero(mad_time_detector(elements, lower_bound=True))[0] + i
118+
outliers.extend(outliers_1)
119+
break
120+
else:
121+
elements = np.mean(brain_voxels[:,i:i+window_length],axis=0)
122+
outliers_1 = np.nonzero(mad_time_detector(elements, lower_bound=True))[0] + i
123+
outliers.extend(outliers_1)
124+
return np.unique(outliers)
65125

66126

67127
def detect_outliers(fname):
@@ -82,7 +142,7 @@ def detect_outliers(fname):
82142
dvs = dvars(img)
83143
is_outlier = iqr_detector(dvs, iqr_proportion=2)
84144
# Return indices of True values from Boolean array.
85-
return np.nonzero(is_outlier)[0]
145+
return np.nonzero(is_outlier)
86146

87147

88148
def find_outliers(data_directory):
@@ -102,7 +162,13 @@ def find_outliers(data_directory):
102162
image_fnames = Path(data_directory).glob("**/sub-*.nii.gz")
103163
outlier_dict = {}
104164
for fname in image_fnames:
105-
outliers = detect_outliers_mean_absolute_deviation_mask(fname)
165+
# detect outliers using mad over voxels and mad over time points
166+
outliers_mad = detect_outliers_mad_median_absolute_deviation_mask(fname)
167+
# detect outliers using dvars and mad over time points
168+
outliers_dvars = detect_outliers_mad_dvars_mask(fname)
169+
# detect outliers using sliding window and mad over time points
170+
outliers_sliding_window = np.array(detect_outliers_mad_sliding_window_mask(fname))
171+
outliers = np.intersect1d(outliers_sliding_window,np.union1d(outliers_mad, outliers_dvars))
106172
#outliers = detect_outliers(fname)
107173
outlier_dict[fname] = outliers
108174
return outlier_dict

0 commit comments

Comments
 (0)