Skip to content

Commit fcb81bb

Browse files
authored
Merge pull request nimh-dsst#35 from nimh-dsst/feature/post-processing
final polish before defaced dataset is ready to be shared
2 parents 8b2c66f + 885e9de commit fcb81bb

File tree

2 files changed

+157
-0
lines changed

2 files changed

+157
-0
lines changed

src/prepare_shareable.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/usr/local/bin/python3
2+
3+
import argparse
4+
import subprocess
5+
from pathlib import Path
6+
import time
7+
import shutil
8+
import json
9+
import os
10+
import filecmp
11+
12+
13+
def get_args():
14+
parser = argparse.ArgumentParser(
15+
description='Prepare the defaced BIDS formatted output dataset to be shared publicly.')
16+
17+
parser.add_argument('original_bids_dir', type=Path,
18+
help='The directory with the input dataset '
19+
'formatted according to the BIDS standard containing non-defaced anatomical images.')
20+
parser.add_argument('defaced_bids_dir', type=Path,
21+
help='The directory with the output dataset '
22+
'formatted according to the BIDS standard containing defaced anatomical images.')
23+
24+
return parser.parse_args()
25+
26+
27+
def run_command(cmdstr, logfile):
28+
if not logfile:
29+
logfile = subprocess.PIPE
30+
subprocess.run(cmdstr, stdout=logfile, stderr=subprocess.STDOUT, encoding='utf8', shell=True)
31+
32+
33+
def scrub_identifiers(bids_defaced_dir):
34+
sidecar_fields_to_rm = ['AcquisitionDateTime', 'AcquisitionTime']
35+
sidecars = bids_defaced_dir.rglob('*.json')
36+
for sidecar in sidecars:
37+
with open(sidecar, 'r') as f:
38+
data = json.load(f)
39+
40+
for field in sidecar_fields_to_rm:
41+
if field in data.keys():
42+
del data[field]
43+
44+
with open(sidecar, 'w') as f:
45+
json.dump(data, f, indent=4)
46+
47+
48+
def main():
49+
args = get_args()
50+
bids_dir = args.bids_dir
51+
bids_defaced_dir = args.output_dir
52+
53+
# copy over all non-anat subdirectories in original BIDS tree
54+
bids_subdirs = [Path(x_walk_tuple[0]).relative_to(bids_dir) for x_walk_tuple in os.walk(bids_dir)]
55+
bids_defaced_subdirs = [Path(y_walk_tuple[0]).relative_to(bids_defaced_dir)
56+
for y_walk_tuple in os.walk(bids_defaced_dir)]
57+
diff_subdirs = set(bids_subdirs).difference(bids_defaced_subdirs)
58+
for subdir in diff_subdirs:
59+
shutil.copytree(bids_dir / subdir, bids_defaced_dir / subdir)
60+
61+
# remove JSON sidecar fields with identifying information
62+
scrub_identifiers(bids_defaced_dir)
63+
64+
# remove defacing pipeline log files
65+
logfiles = bids_defaced_dir.rglob('defacing_pipeline.log')
66+
for logfile in logfiles:
67+
logfile.unlink(missing_ok=True)
68+
69+
# copy over top-level (modality agnostic) files from original BIDS tree
70+
dcmp = filecmp.dircmp(bids_dir, bids_defaced_dir)
71+
for toplevel_file in dcmp.left_only:
72+
shutil.copy2(bids_dir / toplevel_file, bids_defaced_dir / toplevel_file)
73+
74+
75+
if __name__ == "__main__":
76+
start_time = time.time()
77+
main()
78+
print("\n--- %s seconds ---" % (time.time() - start_time))
79+

src/prepare_to_share.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/local/bin/python3
2+
3+
import argparse
4+
import subprocess
5+
from pathlib import Path
6+
import time
7+
import shutil
8+
import json
9+
import os
10+
import filecmp
11+
12+
13+
def get_args():
14+
parser = argparse.ArgumentParser(
15+
description='Prepare the defaced BIDS formatted output dataset to be shared publicly.')
16+
17+
parser.add_argument('original_bids_dir', type=Path,
18+
help='The directory with the input dataset '
19+
'formatted according to the BIDS standard containing non-defaced anatomical images.')
20+
parser.add_argument('defaced_bids_dir', type=Path,
21+
help='The directory with the output dataset '
22+
'formatted according to the BIDS standard containing defaced anatomical images.')
23+
24+
return parser.parse_args()
25+
26+
27+
def run_command(cmdstr, logfile):
28+
if not logfile:
29+
logfile = subprocess.PIPE
30+
subprocess.run(cmdstr, stdout=logfile, stderr=subprocess.STDOUT, encoding='utf8', shell=True)
31+
32+
33+
def scrub_identifiers(bids_defaced_dir):
34+
sidecar_fields_to_rm = ['AcquisitionDateTime', 'AcquisitionTime']
35+
sidecars = bids_defaced_dir.rglob('*.json')
36+
for sidecar in sidecars:
37+
with open(sidecar, 'r') as f:
38+
data = json.load(f)
39+
40+
for field in sidecar_fields_to_rm:
41+
if field in data.keys():
42+
del data[field]
43+
44+
with open(sidecar, 'w') as f:
45+
json.dump(data, f, indent=4)
46+
47+
48+
def main():
49+
args = get_args()
50+
bids_dir = args.bids_dir
51+
bids_defaced_dir = args.output_dir
52+
53+
# copy over all non-anat subdirectories in original BIDS tree
54+
bids_subdirs = [Path(x[0]).relative_to(bids_dir) for x in os.walk(bids_dir)]
55+
bids_defaced_subdirs = [Path(y[0]).relative_to(bids_defaced_dir) for y in os.walk(bids_defaced_dir)]
56+
diff_subdirs = set(bids_subdirs).difference(bids_defaced_subdirs)
57+
for subdir in diff_subdirs:
58+
shutil.copytree(bids_dir / subdir, bids_defaced_dir / subdir)
59+
60+
# remove JSON sidecar fields with identifying information
61+
scrub_identifiers(bids_defaced_dir)
62+
63+
# remove defacing pipeline log files
64+
logfiles = bids_defaced_dir.rglob('defacing_pipeline.log')
65+
for logfile in logfiles:
66+
logfile.unlink(missing_ok=True)
67+
68+
# copy over top-level (modality agnostic) files from original BIDS tree
69+
dcmp = filecmp.dircmp(bids_dir, bids_defaced_dir)
70+
for toplevel_file in dcmp.left_only:
71+
shutil.copy2(bids_dir / toplevel_file, bids_defaced_dir / toplevel_file)
72+
73+
74+
if __name__ == "__main__":
75+
start_time = time.time()
76+
main()
77+
print("\n--- %s seconds ---" % (time.time() - start_time))
78+

0 commit comments

Comments
 (0)