Skip to content

Commit 6653c80

Browse files
authored
Merge pull request #27 from jagruti8/add-detectors-metrics
Added new outlier detector methods, modified the README.md file and modified the algorithm.txt
2 parents f0129d5 + 6e7d954 commit 6653c80

File tree

10 files changed

+146
-41
lines changed

10 files changed

+146
-41
lines changed

README.md

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -114,24 +114,28 @@ identified as an outlier. 0 refers to the first volume. For example (these
114114
outlier IDs are completely random, for illustration):
115115

116116
```
117-
data/group-01/sub-08/func/sub-08_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 18, 19, 133, 134, 135, 136, 154, 155, 157
118-
data/group-01/sub-08/func/sub-08_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 9, 17, 53, 54, 63, 78, 79, 151, 152, 153
119-
data/group-01/sub-01/func/sub-01_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 157, 158
120-
data/group-01/sub-01/func/sub-01_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 17, 19, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160
121-
data/group-01/sub-06/func/sub-06_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 153, 154, 155, 156, 157, 158, 159, 160, 161
122-
data/group-01/sub-06/func/sub-06_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 19, 24, 25, 26, 27, 28, 29, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159
123-
data/group-01/sub-07/func/sub-07_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
124-
data/group-01/sub-07/func/sub-07_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 132, 136, 137, 138, 139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161
125-
data/group-01/sub-09/func/sub-09_task-taskzero_run-01_bold.nii.gz, 0, 134, 135, 136, 143, 144, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160
126-
data/group-01/sub-09/func/sub-09_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 79, 80, 150, 151, 152, 153, 154, 155, 156, 157
127-
data/group-01/sub-10/func/sub-10_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 26, 104
128-
data/group-01/sub-10/func/sub-10_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159
129-
data/group-01/sub-05/func/sub-05_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 25, 26, 27, 48, 49, 52, 76, 77, 150
130-
data/group-01/sub-05/func/sub-05_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 50, 51, 52, 54, 157, 158, 159, 160, 161
131-
data/group-01/sub-02/func/sub-02_task-taskzero_run-02_bold.nii.gz, 34, 65, 105, 106, 107, 135, 140, 148
132-
data/group-01/sub-02/func/sub-02_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
133-
data/group-01/sub-03/func/sub-03_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 101, 102, 103, 160, 161
134-
data/group-01/sub-03/func/sub-03_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 136, 137, 138, 139, 140, 142, 156, 157, 158, 159
135-
data/group-01/sub-04/func/sub-04_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 59, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161
136-
data/group-01/sub-04/func/sub-04_task-taskzero_run-02_bold.nii.gz, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 49, 50, 52, 53, 54, 55, 57, 58, 148, 149, 157
137-
```
117+
data/group-01/sub-08/func/sub-08_task-taskzero_run-01_bold.nii.gz, 11, 157
118+
data/group-01/sub-08/func/sub-08_task-taskzero_run-02_bold.nii.gz, 79, 153
119+
data/group-01/sub-01/func/sub-01_task-taskzero_run-01_bold.nii.gz, 0, 153
120+
data/group-01/sub-01/func/sub-01_task-taskzero_run-02_bold.nii.gz, 151
121+
data/group-01/sub-06/func/sub-06_task-taskzero_run-02_bold.nii.gz, 0, 1, 21, 22, 23, 24, 25, 26, 28, 29, 155
122+
data/group-01/sub-06/func/sub-06_task-taskzero_run-01_bold.nii.gz, 1
123+
data/group-01/sub-07/func/sub-07_task-taskzero_run-02_bold.nii.gz, 79, 80
124+
data/group-01/sub-07/func/sub-07_task-taskzero_run-01_bold.nii.gz, 85
125+
data/group-01/sub-09/func/sub-09_task-taskzero_run-02_bold.nii.gz, 23, 24, 25, 26, 27, 28, 30
126+
data/group-01/sub-10/func/sub-10_task-taskzero_run-02_bold.nii.gz, 104
127+
data/group-01/sub-05/func/sub-05_task-taskzero_run-01_bold.nii.gz, 0, 49, 77, 150
128+
data/group-01/sub-05/func/sub-05_task-taskzero_run-02_bold.nii.gz, 3, 4, 5, 6, 9, 20, 23, 28, 49, 54
129+
data/group-01/sub-03/func/sub-03_task-taskzero_run-02_bold.nii.gz, 160, 161
130+
data/group-01/sub-03/func/sub-03_task-taskzero_run-01_bold.nii.gz, 11, 14, 15, 132, 156, 157
131+
data/group-01/sub-04/func/sub-04_task-taskzero_run-01_bold.nii.gz, 1, 144, 154, 158, 159, 160, 161
132+
data/group-01/sub-04/func/sub-04_task-taskzero_run-02_bold.nii.gz, 0, 1, 28, 35, 49, 52, 53
133+
```
134+
135+
Shown below are the plots of the mean of voxel intensities for each time point vs time points. The detected outliers are marked in orange colour:
136+
![](sub_01_02.png)
137+
![](sub_03_04.png)
138+
![](sub_05_06.png)
139+
![](sub_07_08.png)
140+
![](sub_09_10.png)
141+

algorithm.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,28 @@
11
Algorithm:
22

33
1. The 4D image is first segmented (using otsu threshold) to segment the brain voxels from the background. (x)
4+
5+
I. Median absolute deviation over voxels and median absolute deviation over time points
46
2. The median(med_voxel(x)) and median absolute deviation(mad_voxel(x)) is calculated for each of the brain voxels.
57
3. The brain voxels lying outside the interval [med_voxel(x)-a*mad_voxel(x), med_voxel(x)+a*mad_voxel(x)] are considered as outliers. a = 3.5
68
4. For each time t, the number of outlying voxels n(t) is counted.
79
5. The median (n_med) and MAD (n_mad) of n(t) are calculated. Any time t with n(t)>n_med+3.5*n_mad are considered as outliers.
810

11+
II. DVARS and median absolute deviation over time points
12+
6. The dvars(t) of the brain voxels are calculated.
13+
7. The median (dvars_med) and MAD (dvars_mad) of dvars(t) are calculated. Any time t with |dvars(t)-dvars_med|>3.5*dvars_mad are considered as outliers.
14+
15+
III. Sliding window and median absolute deviation over time points
16+
8. A fraction of the time points are chosen using a sliding window, mean over voxel intensities for each time point (m(t)) in this sliding window is calculated.
17+
9. The median (m_med) and MAD (m_mad) of m(t) in this sliding window are calculated. Any time t with |m(t)-m_med|>3.5*m_mad are considered as outliers.
18+
10. This is repeated till the sliding window covers all the time points.
19+
11. All the outliers in each sliding window are merged.
20+
21+
12. The outliers from the I approach is a more global approach and filters a lot of time-points as outliers.
22+
13. The outliers from the II(DVARS) approach compares successive volumes and if the difference is large considers the preceding volumes as outliers.
23+
14. The outliers from the III(sliding window) approach compares volumes within a certain range and then detect outliers. This is mostly done to take care of drift.
24+
25+
15. Outliers from I and II are merged (o_total) (Global + Local). If these o_total agree with the outliers from sliding window, they are classified as final outliers (o_final).
26+
927
References:
1028
1. Cox, R.W. Outlier Detection in FMRl Time Series. ISMRM(2002).

findoutlie/detectors.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def mad_voxel_detector(img,threshold=3.5):
4545
outlier_tf = np.abs(img-med)>(threshold*mad)
4646
return outlier_tf
4747

48-
def mad_time_detector(measures, threshold=3.5):
48+
def mad_time_detector(measures, lower_bound, threshold=3.5):
4949
""" Detect outliers in 'measures' using median absolute deviation.
5050
Returns 1D vector of same length as 'measures', where True means the corresponsding
5151
value in 'measures' is an outlier.
@@ -71,7 +71,10 @@ def mad_time_detector(measures, threshold=3.5):
7171
# Calculate median absoulte deviation of measures
7272
mad = np.median(np.abs(measures-med))
7373
# Calculate the outliers
74-
outlier_tf = measures>med+threshold*mad
74+
if lower_bound:
75+
outlier_tf = np.abs(measures-med)>threshold*mad
76+
else:
77+
outlier_tf = measures>med+threshold*mad
7578
return outlier_tf
7679

7780
def iqr_detector(measures, iqr_proportion=1.5):

findoutlie/metrics.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,26 @@
55
# +++your code here+++
66
import numpy as np
77

8+
def dvars_voxel(voxels):
9+
""" Calculate dvars metric on 2D array with voxels in rows and time-points(volumes) in columns
10+
11+
The dvars calculation between two volumes is defined as the square root of
12+
(the mean of the (voxel differences square)).
13+
14+
Parameters
15+
----------
16+
voxels : 2D array
17+
18+
Returns
19+
-------
20+
dvals : 1D array
21+
One-dimensional array with n-1 elements, where n is the number of
22+
volumes in 'img'.
23+
"""
24+
vol_diff = voxels[..., 1:] - voxels[..., :-1]
25+
dvar_val = np.sqrt(np.mean(vol_diff ** 2, axis=0))
26+
return dvar_val
27+
828

929
def dvars(img):
1030
""" Calculate dvars metric on Nibabel image `img`
@@ -33,14 +53,8 @@ def dvars(img):
3353
data = img.get_fdata()
3454

3555
voxel_by_time = np.reshape(data, (-1, data.shape[-1]))
56+
dvar_val = dvars_voxel(voxel_by_time)
3657

37-
vol_diff = voxel_by_time[..., 1:] - voxel_by_time[..., :-1] # 2D array
38-
# print(vol_diff.shape())
39-
# print(vol_diff)
40-
# vol_diff_1D=vol_diff.flatten()
41-
dvar_val = np.sqrt(np.mean(vol_diff ** 2, axis=0))
42-
# print(dvar_val.shape())
43-
# print(dvar_val)
4458
return dvar_val
4559

4660
raise NotImplementedError("Code up this function")

findoutlie/outfind.py

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from skimage.filters import threshold_otsu
1111

12-
from .metrics import dvars
12+
from .metrics import dvars,dvars_voxel
1313
from .detectors import iqr_detector,mad_voxel_detector,mad_time_detector
1414

1515
def segment_brain(img):
@@ -27,14 +27,12 @@ def segment_brain(img):
2727
mean_img = np.mean(img, axis=-1)
2828
# calculate the threshold for segmenting brain from background
2929
threshold = threshold_otsu(mean_img)
30-
mask = np.expand_dims(mean_img > threshold, axis=1)
31-
mask_2D = np.tile(mask, (1, img.shape[-1]))
32-
thresholded_img = np.where(mask_2D, img, np.nan)
30+
mask = mean_img > threshold
3331
# filter only brain voxels
34-
brain_voxels = thresholded_img[~np.isnan(thresholded_img).all(axis=1)]
32+
brain_voxels = img[mask]
3533
return brain_voxels
3634

37-
def detect_outliers_mean_absolute_deviation_mask(fname):
35+
def detect_outliers_mad_median_absolute_deviation_mask(fname):
3836
""" Detect outliers given image file path 'filename'
3937
4038
Parameters
@@ -47,7 +45,7 @@ def detect_outliers_mean_absolute_deviation_mask(fname):
4745
outliers : array
4846
Indices of outlier volumes.
4947
"""
50-
# A mask is used to first segment the brain regions from the background, then mean absolute deviation is used to detect outliers
48+
# A mask is used to first segment the brain regions from the background, then median absolute deviation is used to detect outliers
5149
img = nib.load(fname)
5250
img_data = img.get_fdata()
5351
# reshape from 4D to 2D
@@ -59,9 +57,71 @@ def detect_outliers_mean_absolute_deviation_mask(fname):
5957
# calculate the number of outlying voxels for each time point
6058
voxel_outliers_per_time = np.nansum(outliers_voxel,axis=0)
6159
# find the outliers in the time-series
62-
outliers_time = mad_time_detector(voxel_outliers_per_time)
60+
outliers_time = mad_time_detector(voxel_outliers_per_time, lower_bound=False)
6361
# Return indices of True values from Boolean array.
64-
return np.nonzero(outliers_time)[0]
62+
return np.nonzero(outliers_time)
63+
64+
def detect_outliers_mad_dvars_mask(fname):
65+
""" Detect outliers given image file path 'filename'
66+
67+
Parameters
68+
----------
69+
fname : str or Path
70+
Filename of 4D image, as string or Path object
71+
72+
Returns
73+
-------
74+
outliers : array
75+
Indices of outlier volumes.
76+
"""
77+
# A mask is used to first segment the brain regions from the background, dvars is calculated and then median absolute deviation is used to detect outliers
78+
img = nib.load(fname)
79+
img_data = img.get_fdata()
80+
# reshape from 4D to 2D
81+
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
82+
# segment brain from background
83+
brain_voxels = segment_brain(img_data_2D)
84+
# calculate dvars
85+
dvs = dvars_voxel(brain_voxels)
86+
# detect outliers
87+
is_outlier = mad_time_detector(dvs, lower_bound=True)
88+
return np.nonzero(is_outlier)
89+
90+
def detect_outliers_mad_sliding_window_mask(fname):
91+
""" Detect outliers given image file path 'filename'
92+
93+
Parameters
94+
----------
95+
fname : str or Path
96+
Filename of 4D image, as string or Path object
97+
98+
Returns
99+
-------
100+
outliers : array
101+
Indices of outlier volumes.
102+
"""
103+
# A mask is used to first segment the brain regions from the background, sliding window approach is used to detect outliers in each window using median absolute deviation
104+
img = nib.load(fname)
105+
img_data = img.get_fdata()
106+
# reshape from 4D to 2D
107+
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
108+
# segment brain from background
109+
brain_voxels = segment_brain(img_data_2D)
110+
# apply sliding window
111+
overlap = 10
112+
window_length = 20
113+
outliers = []
114+
for i in range(0, brain_voxels.shape[-1],overlap):
115+
if i+window_length>=brain_voxels.shape[-1]:
116+
elements = np.mean(brain_voxels[:,i:],axis=0)
117+
outliers_1 = np.nonzero(mad_time_detector(elements, lower_bound=True))[0] + i
118+
outliers.extend(outliers_1)
119+
break
120+
else:
121+
elements = np.mean(brain_voxels[:,i:i+window_length],axis=0)
122+
outliers_1 = np.nonzero(mad_time_detector(elements, lower_bound=True))[0] + i
123+
outliers.extend(outliers_1)
124+
return np.unique(outliers)
65125

66126

67127
def detect_outliers(fname):
@@ -82,7 +142,7 @@ def detect_outliers(fname):
82142
dvs = dvars(img)
83143
is_outlier = iqr_detector(dvs, iqr_proportion=2)
84144
# Return indices of True values from Boolean array.
85-
return np.nonzero(is_outlier)[0]
145+
return np.nonzero(is_outlier)
86146

87147

88148
def find_outliers(data_directory):
@@ -102,7 +162,13 @@ def find_outliers(data_directory):
102162
image_fnames = Path(data_directory).glob("**/sub-*.nii.gz")
103163
outlier_dict = {}
104164
for fname in image_fnames:
105-
outliers = detect_outliers_mean_absolute_deviation_mask(fname)
165+
# detect outliers using mad over voxels and mad over time points
166+
outliers_mad = detect_outliers_mad_median_absolute_deviation_mask(fname)
167+
# detect outliers using dvars and mad over time points
168+
outliers_dvars = detect_outliers_mad_dvars_mask(fname)
169+
# detect outliers using sliding window and mad over time points
170+
outliers_sliding_window = np.array(detect_outliers_mad_sliding_window_mask(fname))
171+
outliers = np.intersect1d(outliers_sliding_window,np.union1d(outliers_mad, outliers_dvars))
106172
#outliers = detect_outliers(fname)
107173
outlier_dict[fname] = outliers
108174
return outlier_dict

sub_01_02.png

787 KB
Loading

sub_03_04.png

668 KB
Loading

sub_05_06.png

720 KB
Loading

sub_07_08.png

818 KB
Loading

sub_09_10.png

693 KB
Loading

0 commit comments

Comments
 (0)