Skip to content

Commit cf075b8

Browse files
authored
Merge pull request #2 from nipraxis-fall-2022-forks/iqr-outliers
An exercise to complete diagnostics code
2 parents 8b7de78 + 3e31d65 commit cf075b8

15 files changed

+490
-26
lines changed

data/group-00/hash_list.txt

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
5eaa2e01f05cee170e576f5b1e3d4661c75af764 group-00/sub-01/func/sub-01_task-taskzero_run-01_bold.nii.gz
2+
7e39dbebcb9504b26dc90ab97da6925c7229ecdd group-00/sub-01/func/sub-01_task-taskzero_run-01_events.tsv
3+
164738abad431b2e251d32ae5df39ac32d492662 group-00/sub-01/func/sub-01_task-taskzero_run-02_bold.nii.gz
4+
ab264a6822196940f06044f2fcbb611fc5954441 group-00/sub-01/func/sub-01_task-taskzero_run-02_events.tsv
5+
25a80c818325e9b94c604d0cb7ad02fc143eba64 group-00/sub-02/func/sub-02_task-taskzero_run-01_bold.nii.gz
6+
60d3100d2f98cd467482bd0b0a86aab87f63250c group-00/sub-02/func/sub-02_task-taskzero_run-01_events.tsv
7+
20708ce8c3fceb23b70818504103cb54dc11cd45 group-00/sub-02/func/sub-02_task-taskzero_run-02_bold.nii.gz
8+
97c6e697ad5d2837bce14169eca4bf80ed074d70 group-00/sub-02/func/sub-02_task-taskzero_run-02_events.tsv
9+
a5b57e14242e1333cdd5aaf7b0feb6d612d5bd52 group-00/sub-03/func/sub-03_task-taskzero_run-01_bold.nii.gz
10+
13a9fcaefc9e2f3f8d0f962246f894ec3f4cd13c group-00/sub-03/func/sub-03_task-taskzero_run-01_events.tsv
11+
bef1f787f29a6d76d4779d60da3a1d9ce69cdc22 group-00/sub-03/func/sub-03_task-taskzero_run-02_bold.nii.gz
12+
2e432cde2492a32c98c85e393a387a8042cac39c group-00/sub-03/func/sub-03_task-taskzero_run-02_events.tsv
13+
2b43b2821ea004ddbb172085ab7d52e7e679d07e group-00/sub-04/func/sub-04_task-taskzero_run-01_bold.nii.gz
14+
ec65e4be6b734b11e6b7d78bbb6088a6f8b50b53 group-00/sub-04/func/sub-04_task-taskzero_run-01_events.tsv
15+
e0fc9db87118f0765f2fb82c3b3915d72a7f50aa group-00/sub-04/func/sub-04_task-taskzero_run-02_bold.nii.gz
16+
486096db175b3e5fd173fb05b91035a90ee63c9b group-00/sub-04/func/sub-04_task-taskzero_run-02_events.tsv
17+
a04d0d79245e0dc301789ec02a03e98ce1b7a9b5 group-00/sub-05/func/sub-05_task-taskzero_run-01_bold.nii.gz
18+
dc2ab8324999e0a7317e6551305c476d1c0160ae group-00/sub-05/func/sub-05_task-taskzero_run-01_events.tsv
19+
69c697238d7930f07b2339b2afd4a7bf11a52f18 group-00/sub-05/func/sub-05_task-taskzero_run-02_bold.nii.gz
20+
76692e6435353d8b7fd3885f69a10bc64614f55f group-00/sub-05/func/sub-05_task-taskzero_run-02_events.tsv
21+
aaa411bb54454d39a9c245d5c4446899c17a495a group-00/sub-06/func/sub-06_task-taskzero_run-01_bold.nii.gz
22+
fd5f7feed8ddd12ffe4467a4020cc5931cd49b26 group-00/sub-06/func/sub-06_task-taskzero_run-01_events.tsv
23+
9a18f7addc6f9a13b1a4d06b631e67cc20380ea8 group-00/sub-06/func/sub-06_task-taskzero_run-02_bold.nii.gz
24+
2fe32e9ca3338512db646eb3da8b54d3a918ed53 group-00/sub-06/func/sub-06_task-taskzero_run-02_events.tsv
25+
86ecf9bdf2b834233c855569a64cc3ef0698522a group-00/sub-07/func/sub-07_task-taskzero_run-01_bold.nii.gz
26+
e80fd4d390c5be3635a963c14fb5c0d59b740e13 group-00/sub-07/func/sub-07_task-taskzero_run-01_events.tsv
27+
8c2f0d942a7c99c370f9544bc323714708b8e195 group-00/sub-07/func/sub-07_task-taskzero_run-02_bold.nii.gz
28+
b7f85a9b85e834727cfefcf356b9f69cd6f1ad46 group-00/sub-07/func/sub-07_task-taskzero_run-02_events.tsv
29+
dfce82fbf7b3e02d658918fa1341db093ab6e3c4 group-00/sub-08/func/sub-08_task-taskzero_run-01_bold.nii.gz
30+
a4eb6744d5e50a6783d3f365bbd5398151a650f3 group-00/sub-08/func/sub-08_task-taskzero_run-01_events.tsv
31+
4b416251f92ecb10f8c084878049001a224860e2 group-00/sub-08/func/sub-08_task-taskzero_run-02_bold.nii.gz
32+
ce183e545542d7c82b885bb27cb4c4bdfdb1e4b6 group-00/sub-08/func/sub-08_task-taskzero_run-02_events.tsv
33+
7df011856082ac011ac8b191f28e57d12f84bf67 group-00/sub-09/func/sub-09_task-taskzero_run-01_bold.nii.gz
34+
d527ddaee0ea05fdcedb41ef54b2107b655b05d8 group-00/sub-09/func/sub-09_task-taskzero_run-01_events.tsv
35+
36daa50dd2cbd064d652ad519457ad914f2fde12 group-00/sub-09/func/sub-09_task-taskzero_run-02_bold.nii.gz
36+
0734244bb63b92c814fa348bff4eee177bd0ea80 group-00/sub-09/func/sub-09_task-taskzero_run-02_events.tsv
37+
cdfa850cb3b626158bb28aa798803c11a5cd0049 group-00/sub-10/func/sub-10_task-taskzero_run-01_bold.nii.gz
38+
2ef1140403852426ed68f5e93e8bf228cb975ff0 group-00/sub-10/func/sub-10_task-taskzero_run-01_events.tsv
39+
b500a60b6ee1372317a93ebdfc5ae111082cb4fc group-00/sub-10/func/sub-10_task-taskzero_run-02_bold.nii.gz
40+
ab58f5933ca0cb8234a8d6f3713d07692c689437 group-00/sub-10/func/sub-10_task-taskzero_run-02_events.tsv

findoutlie/detectors.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
""" Utilities for detecting outliers
2+
3+
These functions take a vector of values, and return a boolean vector of the
4+
same length as the input, where True indicates the corresponding value is an
5+
outlier.
6+
7+
The outlier detection routines will likely be adapted to the specific measure
8+
that is being worked on. So, some detector functions will work on values > 0,
9+
other on normally distributed values etc. The routines should check that their
10+
requirements are met and raise an error otherwise.
11+
"""
12+
13+
# Any imports you need
14+
# +++your code here+++
15+
16+
17+
def iqr_detector(measures, iqr_proportion=1.5):
18+
""" Detect outliers in `measures` using interquartile range.
19+
20+
Returns a boolean vector of same length as `measures`, where True means the
21+
corresponding value in `measures` is an outlier.
22+
23+
Call Q1, Q2 and Q3 the 25th, 50th and 75th percentiles of `measures`.
24+
25+
The interquartile range (IQR) is Q3 - Q1.
26+
27+
An outlier is any value in `measures` that is either:
28+
29+
* > Q3 + IQR * `iqr_proportion` or
30+
* < Q1 - IQR * `iqr_proportion`.
31+
32+
See: https://en.wikipedia.org/wiki/Interquartile_range
33+
34+
Parameters
35+
----------
36+
measures : 1D array
37+
Values for which we will detect outliers
38+
iqr_proportion : float, optional
39+
Scalar to multiply the IQR to form upper and lower threshold (see
40+
above). Default is 1.5.
41+
42+
Returns
43+
-------
44+
outlier_tf : 1D boolean array
45+
A boolean vector of same length as `measures`, where True means the
46+
corresponding value in `measures` is an outlier.
47+
"""
48+
# Any imports you need
49+
# Hints:
50+
# * investigate np.percentile
51+
# * You'll likely need np.logical_or
52+
# https://textbook.nipraxis.org/numpy_logical.html
53+
# +++your code here+++

findoutlie/spm_funcs.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""
2+
This module defines functions implementing algorithms in SPM
3+
4+
In the same directory as this file, you will find a 'tests' directory.
5+
6+
Test this module with:
7+
8+
python3 findoutlie/tests/test_spm_funcs.py
9+
10+
or better, in IPython::
11+
12+
%run findoutlie/tests/test_spm_funcs.py
13+
"""
14+
15+
import numpy as np
16+
17+
import nibabel as nib
18+
19+
20+
def spm_global(vol):
21+
""" Calculate SPM global metric for array `vol`
22+
23+
Parameters
24+
----------
25+
vol : array
26+
Array giving image data, usually 3D.
27+
28+
Returns
29+
-------
30+
g : float
31+
SPM global metric for `vol`
32+
"""
33+
T = np.mean(vol) / 8
34+
return np.mean(vol[vol > T])
35+
36+
37+
def get_spm_globals(fname):
38+
""" Calculate SPM global metrics for volumes in image filename `fname`
39+
40+
Parameters
41+
----------
42+
fname : str
43+
Filename of file containing 4D image
44+
45+
Returns
46+
-------
47+
spm_vals : array
48+
SPM global metric for each 3D volume in the 4D image.
49+
"""
50+
img = nib.load(fname)
51+
data = img.get_fdata()
52+
spm_vals = []
53+
for i in range(data.shape[-1]):
54+
vol = data[..., i]
55+
spm_vals.append(spm_global(vol))
56+
return np.array(spm_vals)
2.73 MB
Binary file not shown.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
% Get SPM global signal estimate for all volumes in smaller bold 4D
2+
file_volno = spm_select('ExtFPList', pwd, 'ds107_sub012_t1r2_small.nii', inf);
3+
V = spm_vol(file_volno);
4+
global_signals = ones([length(V), 1]);
5+
for i = 1:length(V)
6+
global_signals(i) = spm_global(V(i));
7+
end
8+
% Save signal values to a text file
9+
fid = fopen('global_signals.txt','w');
10+
fprintf(fid,'%6.2f\n', global_signals);
11+
fclose(fid);
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
376.53
2+
375.75
3+
375.26
4+
376.01
5+
376.83
6+
374.15
7+
372.54
8+
373.49
9+
374.23
10+
374.46

findoutlie/tests/test_detectors.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
""" Test script for detector functions
2+
3+
Run these tests with::
4+
5+
python3 findoutlie/tests/test_detectors.py
6+
7+
or better, in IPython::
8+
9+
%run findoutlie/tests/test_detectors.py
10+
"""
11+
12+
from pathlib import Path
13+
import sys
14+
15+
MY_DIR = Path(__file__).parent
16+
17+
# Here you should add the directory containing the findoutlie
18+
# directory to the Python path.
19+
# Hint: sys.path
20+
# Hint: see the solutions if you are stuck.
21+
# +++your code here+++
22+
23+
import numpy as np
24+
25+
# This import needs the directory containing the findoutlie directory
26+
# on the Python path.
27+
from detectors import iqr_detector
28+
29+
30+
def test_iqr_detector():
31+
# From: http://www.purplemath.com/modules/boxwhisk3.htm
32+
example_values = np.array(
33+
[10.2, 14.1, 14.4, 14.4, 14.4, 14.5, 14.5, 14.6, 14.7, 14.7, 14.7,
34+
14.9, 15.1, 15.9, 16.4])
35+
is_outlier = iqr_detector(example_values, 1.5)
36+
assert np.all(example_values[is_outlier] == [10.2, 15.9, 16.4])
37+
# Test not-default value for outlier proportion
38+
is_outlier = iqr_detector(example_values, 0.5)
39+
assert np.all(example_values[is_outlier] == [10.2, 14.1, 15.1, 15.9, 16.4])
40+
41+
42+
if __name__ == '__main__':
43+
# File being executed as a script
44+
test_iqr_detector()
45+
print('Tests passed')

findoutlie/tests/test_spm_funcs.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
""" Test script for SPM functions
2+
3+
Run these tests with::
4+
5+
python3 findoutlie/tests/test_spm_funcs.py
6+
7+
or better, in IPython::
8+
9+
%run findoutlie/tests/test_spm_funcs.py
10+
"""
11+
12+
from pathlib import Path
13+
import sys
14+
15+
MY_DIR = Path(__file__).parent
16+
EXAMPLE_FILENAME = 'ds107_sub012_t1r2_small.nii'
17+
18+
# Here you should add the directory containing the findoutlie
19+
# directory to the Python path.
20+
# Hint: sys.path
21+
# Hint: see the solutions if you are stuck.
22+
# +++your code here+++
23+
24+
import numpy as np
25+
26+
import nibabel as nib
27+
28+
# This import needs the directory containing the findoutlie directory
29+
# on the Python path.
30+
from spm_funcs import get_spm_globals, spm_global
31+
32+
33+
def test_spm_globals():
34+
# Test get_spm_globals and spm_global functions
35+
example_path = MY_DIR / EXAMPLE_FILENAME
36+
expected_values = np.loadtxt(MY_DIR / 'global_signals.txt')
37+
glob_vals = get_spm_globals(example_path)
38+
assert glob_vals is not None, 'Did you forget to return the values?'
39+
assert np.allclose(glob_vals, expected_values, rtol=1e-4)
40+
img = nib.load(example_path)
41+
data = img.get_fdata()
42+
globals = []
43+
for vol_no in range(data.shape[-1]):
44+
vol = data[..., vol_no]
45+
globals.append(spm_global(vol))
46+
assert np.allclose(globals, expected_values, rtol=1e-4)
47+
48+
49+
if __name__ == '__main__':
50+
# File being executed as a script
51+
test_spm_globals()
52+
print('Tests passed')

scripts/validate_data.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
33
Run as:
44
5-
python3 scripts/validata_data.py data
5+
python3 scripts/validate_data.py
66
"""
77

88
from pathlib import Path
9-
import sys
109
import hashlib
1110

11+
1212
def file_hash(filename):
1313
""" Get byte contents of file `filename`, return SHA1 hash
1414
@@ -53,19 +53,22 @@ def validate_data(data_directory):
5353
# If hash for filename is not the same as the one in the file, raise
5454
# ValueError
5555
# This is a placeholder, replace it to write your solution.
56-
raise NotImplementedError('This is just a template -- you are expected to code this.')
56+
raise NotImplementedError(
57+
'This is just a template -- fill out the template with code.')
5758

5859

5960
def main():
6061
# This function (main) called when this file run as a script.
61-
#
62-
# Get the data directory from the command line arguments
63-
if len(sys.argv) < 2:
64-
raise RuntimeError("Please give data directory on "
65-
"command line")
66-
data_directory = sys.argv[1]
62+
group_directory = (Path(__file__).parent.parent / 'data')
63+
groups = list(group_directory.glob('group-??'))
64+
if len(groups) == 0:
65+
raise RuntimeError('No group directory in data directory: '
66+
'have you downloaded and unpacked the data?')
67+
68+
if len(groups) > 1:
69+
raise RuntimeError('Too many group directories in data directory')
6770
# Call function to validate data in data directory
68-
validate_data(data_directory)
71+
validate_data(groups[0])
6972

7073

7174
if __name__ == '__main__':

solutions/.solutions.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,12 @@ out_path = '{one_down}/findoutlie/metrics.py'
55

66
[solution.validate_data]
77
out_path = '{one_down}/scripts/validate_data.py'
8+
9+
[solution.test_spm_funcs]
10+
out_path = '{one_down}/findoutlie/tests/test_spm_funcs.py'
11+
12+
[solution.detectors]
13+
out_path = '{one_down}/findoutlie/detectors.py'
14+
15+
[solution.test_detectors]
16+
out_path = '{one_down}/findoutlie/tests/test_detectors.py'

0 commit comments

Comments
 (0)