Skip to content

Commit 19984f5

Browse files
committed
Initial commit
0 parents  commit 19984f5

File tree

7 files changed

+253
-0
lines changed

7 files changed

+253
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.pyc
2+
__pycache__/

README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Diagnostics project
2+
3+
Script go in the `scripts` directory.
4+
5+
Library code (such as Python modules or packages) goes in the `packages` directory.
6+
7+
You should put this `packages` directory on your Python PATH.
8+
9+
This file has instructions on how to get, validate and process the data.
10+
11+
## Get the data
12+
13+
cd data
14+
curl -LO http://nipy.bic.berkeley.edu/psych-214/group00.tar.gz
15+
tar zxvf group00.tar.gz
16+
cd ..
17+
18+
## Check the data
19+
20+
python3 scripts/validate_data.py data
21+
22+
## Find outliers
23+
24+
python3 scripts/find_outliers.py data
25+
26+
This should print output to the terminal of form:
27+
28+
<filename> <outlier_index>, <outlier_index>, ...
29+
<filename> <outlier_index>, <outlier_index>, ...
30+
31+
Where `<filename>` is the name of the image that has outlier scans, and
32+
`<outlier_index>` is an index to the volume in the 4D image that you have
33+
indentified as an outlier. 0 refers to the first volume. For example:
34+
35+
group00_sub01_run1.nii 3, 21, 22, 104
36+
group00_sub02_run2.nii 11, 33 91
37+
group00_sub04_run2.nii 101, 102, 132
38+
group00_sub07_run2.nii 0, 1, 2, 166, 167
39+
group00_sub09_run2.nii 3

data/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Tell git to ignore everything
2+
*
3+
# Except the data hash list
4+
!data_hashes.txt

data/data_hashes.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
fd0654cf3a93e9e4dc6ab535cdc9239d4e0618ec group00_sub01_run1.nii
2+
d0f0590489efbc0e489f3098394cd3d5d6ca158e group00_sub01_run2.nii
3+
46162b0151127fd81934a5a13db5a8f272b3d5d7 group00_sub02_run1.nii
4+
92586788b1476507c2ce04acda37a1532e5b514d group00_sub02_run2.nii
5+
28895f3a338e941b5c206ddcb458094e009b3ee6 group00_sub03_run1.nii
6+
7b58b334e81d186595c0fc99d1b836ab0ca83aa4 group00_sub03_run2.nii
7+
79e5f243b1b0fd35d7f1635239459bcebc6bb21c group00_sub04_run1.nii
8+
b9b154f8cfa8f66e1ebd8ae9883e0b75793eea5e group00_sub04_run2.nii
9+
354950c19598e1c139cf2006e00d0a548e4108b3 group00_sub05_run1.nii
10+
672221403ed6b5986fbefdd937c4fb1c189b26c6 group00_sub05_run2.nii
11+
b265ffe2fc753f3afa55aab50cde75230a9378bc group00_sub06_run1.nii
12+
bc720346588b5cee301604a3fbd875067d6bdfe2 group00_sub06_run2.nii
13+
5183bbdc8f7705e948c4641a06659003dd4036d9 group00_sub07_run1.nii
14+
ac965ec6b7c76f95f0b43d4683fa23d9e34cf702 group00_sub07_run2.nii
15+
522d66c31244943137e8ef07607f404bb164d571 group00_sub08_run1.nii
16+
8e801bc75b7191d5c2e840f050e153d4e7333e40 group00_sub08_run2.nii
17+
69cec23b3066460f9e2d7ed54a0655248ac3256d group00_sub09_run1.nii
18+
78d71f77ed58621e24ebb648a06468d8849f3e0c group00_sub09_run2.nii
19+
ba6da5b248071651a687216a8f3fcfbcada3df11 group00_sub10_run1.nii
20+
8cd9670e6f2b2a70b6faf4c5aa4cb82c9bee9a84 group00_sub10_run2.nii

scripts/find_outliers.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
""" Python script to find outliers
2+
3+
Run as:
4+
5+
python3 scripts/find_outliers.py data
6+
"""
7+
8+
import sys
9+
10+
def find_outliers(data_directory):
11+
""" Print filenames and outlier indices for images in `data_directory`.
12+
13+
Print filenames and detected outlier indices to the terminal.
14+
15+
Parameters
16+
----------
17+
data_directory : str
18+
Directory containing containing images.
19+
20+
Returns
21+
-------
22+
None
23+
"""
24+
# Your code here
25+
raise RuntimeError('No code yet')
26+
27+
28+
def main():
29+
# This function (main) called when this file run as a script.
30+
#
31+
# Get the data directory from the command line arguments
32+
if len(sys.argv) < 2:
33+
raise RuntimeError("Please give data directory on "
34+
"command line")
35+
data_directory = sys.argv[1]
36+
# Call function to validate data in data directory
37+
find_outliers(data_directory)
38+
39+
40+
if __name__ == '__main__':
41+
# Python is running this file as a script, not importing it.
42+
main()

scripts/validate_data.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
""" Python script to validate data
2+
3+
Run as:
4+
5+
python3 scripts/validata_data.py data
6+
"""
7+
8+
import os
9+
import sys
10+
import hashlib
11+
12+
def file_hash(filename):
13+
""" Get byte contents of file `filename`, return SHA1 hash
14+
15+
Parameters
16+
----------
17+
filename : str
18+
Name of file to read
19+
20+
Returns
21+
-------
22+
hash : str
23+
SHA1 hexadecimal hash string for contents of `filename`.
24+
"""
25+
# Open the file, read contents as bytes.
26+
# Calculate, return SHA1 has on the bytes from the file.
27+
raise RuntimeError('No code yet')
28+
29+
30+
def validate_data(data_directory):
31+
""" Read ``data_hashes.txt`` file in `data_directory`, check hashes
32+
33+
Parameters
34+
----------
35+
data_directory : str
36+
Directory containing data and ``data_hashes.txt`` file.
37+
38+
Returns
39+
-------
40+
None
41+
42+
Raises
43+
------
44+
ValueError:
45+
If hash value for any file is different from hash value recorded in
46+
``data_hashes.txt`` file.
47+
"""
48+
# Read lines from ``data_hashes.txt`` file.
49+
# Split into SHA1 hash and filename
50+
# Calculate actual hash for given filename.
51+
# If hash for filename is not the same as the one in the file, raise
52+
# ValueError
53+
raise RuntimeError("No code yet")
54+
55+
56+
def main():
57+
# This function (main) called when this file run as a script.
58+
#
59+
# Get the data directory from the command line arguments
60+
if len(sys.argv) < 2:
61+
raise RuntimeError("Please give data directory on "
62+
"command line")
63+
data_directory = sys.argv[1]
64+
# Call function to validate data in data directory
65+
validate_data(data_directory)
66+
67+
68+
if __name__ == '__main__':
69+
# Python is running this file as a script, not importing it.
70+
main()

solutions/validate_data.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
""" Python script to validate data
2+
3+
Run as:
4+
5+
python3 scripts/validata_data.py data
6+
"""
7+
8+
import os
9+
import sys
10+
import hashlib
11+
12+
def file_hash(filename):
13+
""" Get byte contents of file `filename`, return SHA1 hash
14+
15+
Parameters
16+
----------
17+
filename : str
18+
Name of file to read
19+
20+
Returns
21+
-------
22+
hash : str
23+
SHA1 hexadecimal hash string for contents of `filename`.
24+
"""
25+
# Open the file, read contents as bytes.
26+
# Calculate, return SHA1 has on the bytes from the file.
27+
with open(filename, 'rb') as fobj:
28+
contents = fobj.read()
29+
return hashlib.sha1(contents).hexdigest()
30+
31+
32+
def validate_data(data_directory):
33+
""" Read ``data_hashes.txt`` file in `data_directory`, check hashes
34+
35+
Parameters
36+
----------
37+
data_directory : str
38+
Directory containing data and ``data_hashes.txt`` file.
39+
40+
Returns
41+
-------
42+
None
43+
44+
Raises
45+
------
46+
ValueError:
47+
If hash value for any file is different from hash value recorded in
48+
``data_hashes.txt`` file.
49+
"""
50+
# Read lines from ``data_hashes.txt`` file.
51+
for line in open(os.path.join(data_directory, 'data_hashes.txt'), 'rt'):
52+
# Split into SHA1 hash and filename
53+
hash, filename = line.strip().split()
54+
# Calculate actual hash for given filename.
55+
actual_hash = file_hash(os.path.join(data_directory, filename))
56+
# If hash for filename is not the same as the one in the file, raise
57+
# ValueError
58+
if hash != actual_hash:
59+
raise ValueError("Hash for {} does not match".format(filename))
60+
61+
62+
def main():
63+
# This function (main) called when this file run as a script.
64+
#
65+
# Get the data directory from the command line arguments
66+
if len(sys.argv) < 2:
67+
raise RuntimeError("Please give data directory on "
68+
"command line")
69+
data_directory = sys.argv[1]
70+
# Call function to validate data in data directory
71+
validate_data(data_directory)
72+
73+
74+
if __name__ == '__main__':
75+
# Python is running this file as a script, not importing it.
76+
main()

0 commit comments

Comments
 (0)