Skip to content

Commit a9dd168

Browse files
committed
Removed S3DataSink and changed dummy file writing to be Python2/3 compatible
1 parent 49c14f8 commit a9dd168

File tree

3 files changed

+87
-268
lines changed

3 files changed

+87
-268
lines changed

nipype/interfaces/io.py

Lines changed: 51 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,9 @@ class DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
205205

206206
# AWS S3 data attributes
207207
creds_path = traits.Str(desc='Filepath to AWS credentials file for S3 bucket '\
208-
'access')
208+
'access; if not specified, the credentials will '\
209+
'be taken from the AWS_ACCESS_KEY_ID and '\
210+
'AWS_SECRET_ACCESS_KEY environment variables')
209211
encrypt_bucket_keys = traits.Bool(desc='Flag indicating whether to use S3 '\
210212
'server-side AES-256 encryption')
211213
# Set this if user wishes to override the bucket with their own
@@ -426,16 +428,15 @@ def _check_s3_base_dir(self):
426428
return s3_flag
427429

428430
# Function to return AWS secure environment variables
429-
def _return_aws_keys(self, creds_path):
431+
def _return_aws_keys(self):
430432
'''
431433
Method to return AWS access key id and secret access key using
432434
credentials found in a local file.
433435
434436
Parameters
435437
----------
436-
creds_path : string (filepath)
437-
path to the csv file downloaded from AWS; can either be root
438-
or user credentials
438+
self : nipype.interfaces.io.DataSink
439+
self for instance method
439440
440441
Returns
441442
-------
@@ -445,28 +446,38 @@ def _return_aws_keys(self, creds_path):
445446
string of the AWS secret access key
446447
'''
447448

449+
# Import packages
450+
import os
451+
448452
# Init variables
449-
with open(creds_path, 'r') as creds_in:
450-
# Grab csv rows
451-
row1 = creds_in.readline()
452-
row2 = creds_in.readline()
453-
454-
# Are they root or user keys
455-
if 'User Name' in row1:
456-
# And split out for keys
457-
aws_access_key_id = row2.split(',')[1]
458-
aws_secret_access_key = row2.split(',')[2]
459-
elif 'AWSAccessKeyId' in row1:
460-
# And split out for keys
461-
aws_access_key_id = row1.split('=')[1]
462-
aws_secret_access_key = row2.split('=')[1]
463-
else:
464-
err_msg = 'Credentials file not recognized, check file is correct'
465-
raise Exception(err_msg)
453+
creds_path = self.inputs.creds_path
454+
455+
# Check if creds exist
456+
if creds_path and os.path.exists(creds_path):
457+
with open(creds_path, 'r') as creds_in:
458+
# Grab csv rows
459+
row1 = creds_in.readline()
460+
row2 = creds_in.readline()
461+
462+
# Are they root or user keys
463+
if 'User Name' in row1:
464+
# And split out for keys
465+
aws_access_key_id = row2.split(',')[1]
466+
aws_secret_access_key = row2.split(',')[2]
467+
elif 'AWSAccessKeyId' in row1:
468+
# And split out for keys
469+
aws_access_key_id = row1.split('=')[1]
470+
aws_secret_access_key = row2.split('=')[1]
471+
else:
472+
err_msg = 'Credentials file not recognized, check file is correct'
473+
raise Exception(err_msg)
466474

467-
# Strip any carriage return/line feeds
468-
aws_access_key_id = aws_access_key_id.replace('\r', '').replace('\n', '')
469-
aws_secret_access_key = aws_secret_access_key.replace('\r', '').replace('\n', '')
475+
# Strip any carriage return/line feeds
476+
aws_access_key_id = aws_access_key_id.replace('\r', '').replace('\n', '')
477+
aws_secret_access_key = aws_secret_access_key.replace('\r', '').replace('\n', '')
478+
else:
479+
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
480+
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
470481

471482
# Return keys
472483
return aws_access_key_id, aws_secret_access_key
@@ -479,6 +490,8 @@ def _fetch_bucket(self, bucket_name):
479490
480491
Parameters
481492
----------
493+
self : nipype.interfaces.io.DataSink
494+
self for instance method
482495
bucket_name : string
483496
string corresponding to the name of the bucket on S3
484497
@@ -504,19 +517,21 @@ def _fetch_bucket(self, bucket_name):
504517
creds_path = self.inputs.creds_path
505518
iflogger = logging.getLogger('interface')
506519

520+
# Get AWS credentials
521+
try:
522+
aws_access_key_id, aws_secret_access_key = \
523+
self._return_aws_keys()
524+
except Exception as exc:
525+
err_msg = 'There was a problem extracting the AWS credentials '\
526+
'from the credentials file provided: %s. Error:\n%s'\
527+
% (creds_path, exc)
528+
raise Exception(err_msg)
529+
507530
# Try and get AWS credentials if a creds_path is specified
508-
if creds_path:
509-
try:
510-
aws_access_key_id, aws_secret_access_key = \
511-
self._return_aws_keys(creds_path)
512-
except Exception as exc:
513-
err_msg = 'There was a problem extracting the AWS credentials '\
514-
'from the credentials file provided: %s. Error:\n%s'\
515-
% (creds_path, exc)
516-
raise Exception(err_msg)
531+
if aws_access_key_id and aws_secret_access_key:
517532
# Init connection
518-
iflogger.info('Connecting to S3 bucket: %s with credentials from '\
519-
'%s ...' % (bucket_name, creds_path))
533+
iflogger.info('Connecting to S3 bucket: %s with credentials...'\
534+
% bucket_name)
520535
# Use individual session for each instance of DataSink
521536
# Better when datasinks are being used in multi-threading, see:
522537
# http://boto3.readthedocs.org/en/latest/guide/resources.html#multithreading
@@ -762,101 +777,6 @@ def _list_outputs(self):
762777
return outputs
763778

764779

765-
class S3DataSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
766-
testing = traits.Bool(False, usedefault=True,
767-
desc='Flag for using local fakes3 server.'
768-
' (for testing purposes only)')
769-
anon = traits.Bool(False, usedefault=True,
770-
desc='Use anonymous connection to s3')
771-
bucket = traits.Str(mandatory=True,
772-
desc='Amazon S3 bucket where your data is stored')
773-
bucket_path = traits.Str('', usedefault=True,
774-
desc='Location within your bucket to store '
775-
'data.')
776-
base_directory = Directory(
777-
desc='Path to the base directory for storing data.')
778-
container = traits.Str(
779-
desc='Folder within base directory in which to store output')
780-
parameterization = traits.Bool(True, usedefault=True,
781-
desc='store output in parametrized structure')
782-
strip_dir = Directory(desc='path to strip out of filename')
783-
substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
784-
desc=('List of 2-tuples reflecting string '
785-
'to substitute and string to replace '
786-
'it with'))
787-
regexp_substitutions = InputMultiPath(traits.Tuple(traits.Str, traits.Str),
788-
desc=('List of 2-tuples reflecting a pair '
789-
'of a Python regexp pattern and a '
790-
'replacement string. Invoked after '
791-
'string `substitutions`'))
792-
793-
_outputs = traits.Dict(traits.Str, value={}, usedefault=True)
794-
remove_dest_dir = traits.Bool(False, usedefault=True,
795-
desc='remove dest directory when copying dirs')
796-
# Set this if user wishes to have local copy of files as well
797-
local_copy = traits.Str(desc='Copy files locally as well as to S3 bucket')
798-
799-
def __setattr__(self, key, value):
800-
if key not in self.copyable_trait_names():
801-
if not isdefined(value):
802-
super(S3DataSinkInputSpec, self).__setattr__(key, value)
803-
self._outputs[key] = value
804-
else:
805-
if key in self._outputs:
806-
self._outputs[key] = value
807-
super(S3DataSinkInputSpec, self).__setattr__(key, value)
808-
809-
810-
class S3DataSink(DataSink):
811-
""" Works exactly like DataSink, except the specified files will
812-
also be uploaded to Amazon S3 storage in the specified bucket
813-
and location. 'bucket_path' is the s3 analog for
814-
'base_directory'.
815-
816-
"""
817-
input_spec = S3DataSinkInputSpec
818-
819-
def _list_outputs(self):
820-
"""Execute this module.
821-
"""
822-
outputs = super(S3DataSink, self)._list_outputs()
823-
824-
self.localtos3(outputs['out_file'])
825-
826-
return outputs
827-
828-
def localtos3(self, paths):
829-
if self.inputs.testing:
830-
conn = S3Connection(anon=True, is_secure=False, port=4567,
831-
host='localhost',
832-
calling_format=OrdinaryCallingFormat())
833-
834-
else:
835-
conn = S3Connection(anon=self.inputs.anon)
836-
bkt = conn.get_bucket(self.inputs.bucket)
837-
s3paths = []
838-
839-
for path in paths:
840-
# convert local path to s3 path
841-
bd_index = path.find(self.inputs.base_directory)
842-
if bd_index != -1: # base_directory is in path, maintain directory structure
843-
s3path = path[bd_index + len(self.inputs.base_directory):] # cut out base directory
844-
if s3path[0] == os.path.sep:
845-
s3path = s3path[1:]
846-
else: # base_directory isn't in path, simply place all files in bucket_path folder
847-
s3path = os.path.split(path)[1] # take filename from path
848-
s3path = os.path.join(self.inputs.bucket_path, s3path)
849-
if s3path[-1] == os.path.sep:
850-
s3path = s3path[:-1]
851-
s3paths.append(s3path)
852-
853-
k = boto.s3.key.Key(bkt)
854-
k.key = s3path
855-
k.set_contents_from_filename(path)
856-
857-
return s3paths
858-
859-
860780
class S3DataGrabberInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
861781
anon = traits.Bool(False, usedefault=True,
862782
desc='Use anonymous connection to s3. If this is set to True, boto may print' +

nipype/interfaces/tests/test_auto_S3DataSink.py

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)