Add unsigned s3 client. Allow caching of downloaded files. Fix typo in mri_convert.

akeshavan · akeshavan · commit 08fa933a349b · 2018-09-07T13:26:47.000-07:00
diff --git a/preafq/base.py b/preafq/base.py
@@ -4,6 +4,11 @@
 
 from collections import namedtuple
 
+import boto3
+from botocore import UNSIGNED
+from botocore.client import Config
+
+
 # Define the different namedtuple return types
 InputFiles = namedtuple(
     'InputFiles',
@@ -15,3 +20,6 @@
     'InputFilesWithSession',
     ['subject', 'site', 'session', 'files', 'file_type']
 )
+
+# Global s3 client to preserve anonymous config
+s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
diff --git a/preafq/fetch_bids_s3.py b/preafq/fetch_bids_s3.py
@@ -11,7 +11,7 @@
 
 import boto3
 
-from .base import InputFiles, InputFilesWithSession
+from .base import InputFiles, InputFilesWithSession, s3_client
 
 
 mod_logger = logging.getLogger(__name__)
@@ -108,12 +108,10 @@ def get_s3_keys(prefix, site, bucket='fcp-indi'):
     list
         All the keys matching the prefix and site in the S3 bucket
     """
-    s3 = boto3.client('s3')
-
     # Avoid duplicate trailing slash in prefix
     prefix = prefix.rstrip('/')
 
-    response = s3.list_objects_v2(
+    response = s3_client.list_objects_v2(
         Bucket=bucket,
         Prefix=prefix + '/' + site + '/',
     )
@@ -127,7 +125,7 @@ def get_s3_keys(prefix, site, bucket='fcp-indi'):
         )
 
     while response['IsTruncated']:
-        response = s3.list_objects_v2(
+        response = s3_client.list_objects_v2(
             Bucket=bucket,
             Prefix=prefix + '/' + site + '/',
             ContinuationToken=response['NextContinuationToken']
@@ -230,7 +228,8 @@ def get_subject_id(key):
     return s3_registers
 
 
-def download_register(subject_keys, bucket='fcp-indi', directory='./input'):
+def download_register(subject_keys, bucket='fcp-indi', directory='./input',
+                      overwrite=False):
     """
     Parameters
     ----------
@@ -246,6 +245,9 @@ def download_register(subject_keys, bucket='fcp-indi', directory='./input'):
     directory : string
         Local directory to which to save files
 
+    overwrite : bool
+        Flag to overwrite existing files
+
     Returns
     -------
     files : InputFiles namedtuple
@@ -256,7 +258,6 @@ def download_register(subject_keys, bucket='fcp-indi', directory='./input'):
         'files' : local file paths,
         'file_type' : 'local',
     """
-    s3 = boto3.client('s3')
     subject = subject_keys.subject
     site = subject_keys.site
 
@@ -275,10 +276,13 @@ def download_register(subject_keys, bucket='fcp-indi', directory='./input'):
     def download_from_s3(fname_, bucket_, key_):
         # Create the directory and file if necessary
         Path(op.dirname(fname_)).mkdir(parents=True, exist_ok=True)
-        Path(fname_).touch(exist_ok=True)
+        try:
+            Path(fname_).touch(exist_ok=overwrite)
 
-        # Download the file
-        s3.download_file(Bucket=bucket_, Key=key_, Filename=fname_)
+            # Download the file
+            s3_client.download_file(Bucket=bucket_, Key=key_, Filename=fname_)
+        except FileExistsError:
+            mod_logger.info('File {fname:s} already exists. Continuing...')
 
     s3keys = subject_keys.files
     files = input_files.files
@@ -366,14 +370,12 @@ def determine_directions(input_files,
                 ''.format(files=required_json - set(json_files))
             )
 
-        s3 = boto3.client('s3')
-
         def get_json(json_file):
             if input_type == 'local':
                 with open(json_file, 'r') as fp:
                     meta = json.load(fp)
             else:
-                response = s3.get_object(
+                response = s3_client.get_object(
                     Bucket=bucket,
                     Key=json_file,
                 )
diff --git a/preafq/preafq.py b/preafq/preafq.py
@@ -30,7 +30,7 @@ def move_t1_to_freesurfer(t1_file):
     """
     freesurfer_path = op.join(op.dirname(t1_file), 'freesurfer')
 
-    convert_cmd = 'mriconvert {in_:s} {out_:s}'.format(
+    convert_cmd = 'mri_convert {in_:s} {out_:s}'.format(
         in_=t1_file, out_=op.join(freesurfer_path, 'mri', 'orig.mgz')
     )
 
@@ -104,12 +104,12 @@ def pre_afq_individual(input_s3_keys, s3_prefix, out_bucket,
     out_dir = op.join(workdir, 'output')
 
     run_preAFQ(
-        dwi_file=input_files.files['dwi'],
-        dwi_file_AP=input_files.files['epi_ap'],
-        dwi_file_PA=input_files.files['epi_pa'],
-        bvec_file=input_files.files['bvec'],
-        bval_file=input_files.files['bval'],
-        subjects_dir=op.dirname(input_files.files['t1w']),
+        dwi_file=input_files.files['dwi'][0],
+        dwi_file_AP=input_files.files['epi_ap'][0],
+        dwi_file_PA=input_files.files['epi_pa'][0],
+        bvec_file=input_files.files['bvec'][0],
+        bval_file=input_files.files['bval'][0],
+        subjects_dir=op.dirname(input_files.files['t1w'][0]),
         working_dir=scratch_dir,
         out_dir=out_dir,
     )