Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ class AwsClientFactory {

private String profile

private boolean anonymous

/**
* Initialise the Amazon cloud driver with default (empty) parameters
*/
AwsClientFactory() {
this(new AwsConfig(Collections.emptyMap()))
}

AwsClientFactory(AwsConfig config, String region=null) {
AwsClientFactory(AwsConfig config, String region=null, Boolean anonymous=false) {
this.config = config

if( config.accessKey && config.secretKey ) {
Expand All @@ -104,7 +106,8 @@ class AwsClientFactory {
?: SysEnv.get('AWS_REGION')
?: SysEnv.get('AWS_DEFAULT_REGION')
?: fetchRegion()

this.anonymous = anonymous ?:
config.s3Config.anonymous
if( !this.region )
throw new AbortOperationException('Missing AWS region -- Make sure to define in your system environment the variable `AWS_DEFAULT_REGION`')
}
Expand All @@ -117,6 +120,8 @@ class AwsClientFactory {

String profile() { profile }

AwsConfig config() { config }

/**
* Retrieve the current IAM role eventually define for a EC2 instance.
* See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials
Expand Down Expand Up @@ -288,7 +293,7 @@ class AwsClientFactory {
* @return an AwsCredentialsProvider instance, falling back to anonymous if needed.
*/
private AwsCredentialsProvider getS3CredentialsProvider() {
if ( config.s3Config.anonymous )
if ( this.anonymous )
return AnonymousCredentialsProvider.create()
def provider = getCredentialsProvider0()
try {
Expand Down
66 changes: 65 additions & 1 deletion plugins/nf-amazon/src/main/nextflow/cloud/aws/nio/S3Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,80 @@ public class S3Client {

private boolean global;

public S3Client(AwsClientFactory factory, Properties props, boolean global) {
public S3Client(AwsClientFactory factory, Properties props, boolean global, String bucketName) {
S3SyncClientConfiguration clientConfig = S3SyncClientConfiguration.create(props);
this.factory = factory;
this.props = props;
this.global = global;
this.client = factory.getS3Client(clientConfig, global);
this.semaphore = Threads.useVirtual() ? new Semaphore(clientConfig.getMaxConnections()) : null;
this.callerAccount = fetchCallerAccount();

// Check if bucket is accessible, fallback to anonymous if it's a public bucket
if (bucketName != null && !bucketName.isEmpty()) {
checkAndFallbackToAnonymousIfNeeded(bucketName, clientConfig);
}
}

/**
* Checks if the bucket is accessible with current credentials.
* If access is denied but the bucket is publicly accessible, recreates the client with anonymous credentials.
*
* @param bucketName the name of the bucket to check
* @param clientConfig the S3 client configuration
*/
private void checkAndFallbackToAnonymousIfNeeded(String bucketName, S3SyncClientConfiguration clientConfig) {
try {
// Try to access the bucket with current credentials
client.headBucket(HeadBucketRequest.builder().bucket(bucketName).build());
log.trace("Bucket {} is accessible with current credentials", bucketName);
} catch (S3Exception e) {
// Check if it's an access denied error (403) or forbidden
if (e.statusCode() == 403 || e.statusCode() == 401) {
log.debug("Access denied to bucket {} with current credentials (status: {}), checking if bucket is public", bucketName, e.statusCode());
checkAndManagePublicBucket(bucketName, clientConfig);
} else {
// Other errors (like bucket doesn't exist) should be handled by caller
log.trace("Error checking bucket {} accessibility: {}", bucketName, e.getMessage());
}
} catch (Exception e) {
// Non-S3 exceptions, log and continue with current credentials
log.trace("Unexpected error checking bucket {} accessibility: {}", bucketName, e.getMessage());
}
}

/**
* Tests if a bucket is publicly accessible using anonymous credentials and update client and factory if is a public bucket.
*
* @param bucketName the bucket name to test
* @param clientConfig the S3 client configuration
*/
private void checkAndManagePublicBucket(String bucketName, S3SyncClientConfiguration clientConfig) {

software.amazon.awssdk.services.s3.S3Client anonymousClient = null;
try {
// Create an anonymous client
AwsClientFactory anonymousFactory = new AwsClientFactory(factory.config(), factory.region(), true);
anonymousClient = anonymousFactory.getS3Client(clientConfig, global);

// Try to access the bucket anonymously
anonymousClient.headBucket(HeadBucketRequest.builder().bucket(bucketName).build());

// If no error update client
updateClient(anonymousFactory, anonymousClient);
} catch (Exception e) {
log.trace("Bucket {} is not publicly accessible: {}", bucketName, e.getMessage());
if(anonymousClient != null)
anonymousClient.close();
}
}

private void updateClient(AwsClientFactory newFactory, software.amazon.awssdk.services.s3.S3Client newClient){
this.client.close();
this.client = newClient;
this.factory = newFactory;
}

/**
* Perform an action that requires the S3 semaphore to limit concurrent connections.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ protected S3FileSystem createFileSystem(URI uri, AwsConfig awsConfig) {
// see https://github.com/nextflow-io/nextflow/pull/5779
final boolean global = bucketName!=null && !awsConfig.getS3Config().isCustomEndpoint();
final AwsClientFactory factory = new AwsClientFactory(awsConfig, awsConfig.resolveS3Region());
final S3Client client = new S3Client(factory, props, global);
final S3Client client = new S3Client(factory, props, global, bucketName);

// set the client acl
client.setCannedAcl(getProp(props, "s_3_acl", "s3_acl", "s3acl", "s3Acl"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,28 @@ class S3BashLib extends BashFunLib<S3BashLib> {
local source=\$1
local target=\$2
local file_name=\$(basename \$1)
local is_dir=\$($cli s3 ls \$source | grep -F "PRE \${file_name}/" -c)
local is_dir=\$(nxf_s3_fallback s3 ls \$source | grep -F "PRE \${file_name}/" -c)
local opts=(--only-show-errors)
if [[ \$is_dir == 1 ]]; then
$cli s3 cp --only-show-errors --recursive "\$source" "\$target"
else
$cli s3 cp --only-show-errors "\$source" "\$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "\${opts[@]}" "\$source" "\$target"
}

nxf_s3_fallback() {
local args=("\$@")
local output

if ! output=\$($cli "\${args[@]}" 2>&1); then
if echo "\$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
$cli --no-sign-request "\${args[@]}"
else
echo "\$output"
return 1
fi
else
echo "\$output"
fi
}
""".stripIndent(true)
Expand Down Expand Up @@ -162,11 +179,28 @@ class S3BashLib extends BashFunLib<S3BashLib> {
local source=\$1
local target=\$2
local file_name=\$(basename \$1)
local is_dir=\$($cli ls \$source | grep -F "DIR \${file_name}/" -c)
local is_dir=\$(nxf_s3_fallback ls \$source | grep -F "DIR \${file_name}/" -c)
if [[ \$is_dir == 1 ]]; then
$cli cp "\$source/*" "\$target"
nxf_s3_fallback cp "\$source/*" "\$target"
else
$cli cp "\$source" "\$target"
nxf_s3_fallback cp "\$source" "\$target"
fi
}

nxf_s3_fallback() {
local args=("\$@")
local output

if ! output=\$($cli "\${args[@]}" 2>&1); then
if echo "\$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
$cli --no-sign-request "\${args[@]}"
else
echo "\$output"
return 1
fi
else
echo "\$output"
fi
}
""".stripIndent()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,28 @@ class AwsBatchFileCopyStrategyTest extends Specification {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
local is_dir=$(nxf_s3_fallback s3 ls $source | grep -F "PRE ${file_name}/" -c)
local opts=(--only-show-errors)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "${opts[@]}" "$source" "$target"
}

nxf_s3_fallback() {
local args=("$@")
local output

if ! output=$(aws "${args[@]}" 2>&1); then
if echo "$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
aws --no-sign-request "${args[@]}"
else
echo "$output"
return 1
fi
else
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theres' no way to avoid the fallback logic and make this predictable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is tricky, the only way to check if it is public in this condition is just try to access. We could reduce the number of fallbacks by using the try we do in the ls for the cp, so if the ls has worked with --no-sign-request, we will use it in the cp. It will be a complex code but I will just fallback once per file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be the alternative only falling back for ls

      nxf_s3_download() {
            local source=\$1
            local target=\$2
            local file_name=\$(basename \$1)
            local opts=(--only-show-errors)
            local ls_output
            if ! ls_output=\$($cli s3 ls \$source 2>&1); then
                if echo "\$ls_output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
                    echo "Access denied, retrying unsigned request..."
                    if ! ls_output=\$($cli s3 ls --no-sign-request \$source 2>&1); then
                        echo \$ls_output
                        return 1
                    else
                        opts+=(--no-sign-request)
                    fi
                else
                    echo \$ls_output
                    return 1
                fi
            fi   
                    
            local is_dir=\$(echo \$ls_output | grep -F "PRE \${file_name}/" -c)
            
            if [[ \$is_dir == 1 ]]; then
                opts+=(--recursive)
            fi
            $cli s3 cp "\${opts[@]}" "\$source" "\$target"
        }

echo "$output"
fi
}
'''.stripIndent(true)
Expand Down Expand Up @@ -293,11 +310,28 @@ class AwsBatchFileCopyStrategyTest extends Specification {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(/foo/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
local is_dir=$(nxf_s3_fallback s3 ls $source | grep -F "PRE ${file_name}/" -c)
local opts=(--only-show-errors)
if [[ $is_dir == 1 ]]; then
/foo/aws s3 cp --only-show-errors --recursive "$source" "$target"
else
/foo/aws s3 cp --only-show-errors "$source" "$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "${opts[@]}" "$source" "$target"
}

nxf_s3_fallback() {
local args=("$@")
local output

if ! output=$(/foo/aws "${args[@]}" 2>&1); then
if echo "$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
/foo/aws --no-sign-request "${args[@]}"
else
echo "$output"
return 1
fi
else
echo "$output"
fi
}
'''.stripIndent(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,28 @@ class AwsBatchScriptLauncherTest extends Specification {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(/conda/bin/aws --region eu-west-1 s3 ls $source | grep -F "PRE ${file_name}/" -c)
local is_dir=$(nxf_s3_fallback s3 ls $source | grep -F "PRE ${file_name}/" -c)
local opts=(--only-show-errors)
if [[ $is_dir == 1 ]]; then
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --recursive "$source" "$target"
else
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors "$source" "$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "${opts[@]}" "$source" "$target"
}

nxf_s3_fallback() {
local args=("$@")
local output

if ! output=$(/conda/bin/aws --region eu-west-1 "${args[@]}" 2>&1); then
if echo "$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
/conda/bin/aws --region eu-west-1 --no-sign-request "${args[@]}"
else
echo "$output"
return 1
fi
else
echo "$output"
fi
}

Expand Down Expand Up @@ -315,11 +332,28 @@ class AwsBatchScriptLauncherTest extends Specification {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
local is_dir=$(nxf_s3_fallback s3 ls $source | grep -F "PRE ${file_name}/" -c)
local opts=(--only-show-errors)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "${opts[@]}" "$source" "$target"
}

nxf_s3_fallback() {
local args=("$@")
local output

if ! output=$(aws "${args[@]}" 2>&1); then
if echo "$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
aws --no-sign-request "${args[@]}"
else
echo "$output"
return 1
fi
else
echo "$output"
fi
}

Expand Down Expand Up @@ -487,11 +521,28 @@ class AwsBatchScriptLauncherTest extends Specification {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
local is_dir=$(nxf_s3_fallback s3 ls $source | grep -F "PRE ${file_name}/" -c)
local opts=(--only-show-errors)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "${opts[@]}" "$source" "$target"
}

nxf_s3_fallback() {
local args=("$@")
local output

if ! output=$(aws "${args[@]}" 2>&1); then
if echo "$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
aws --no-sign-request "${args[@]}"
else
echo "$output"
return 1
fi
else
echo "$output"
fi
}

Expand Down Expand Up @@ -603,11 +654,28 @@ class AwsBatchScriptLauncherTest extends Specification {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
local is_dir=$(nxf_s3_fallback s3 ls $source | grep -F "PRE ${file_name}/" -c)
local opts=(--only-show-errors)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
opts+=(--recursive)
fi
nxf_s3_fallback s3 cp "${opts[@]}" "$source" "$target"
}

nxf_s3_fallback() {
local args=("$@")
local output

if ! output=$(aws "${args[@]}" 2>&1); then
if echo "$output" | grep -Eq "(AccessDenied|Forbidden|403)"; then
echo "Access denied, retrying unsigned request..."
aws --no-sign-request "${args[@]}"
else
echo "$output"
return 1
fi
else
echo "$output"
fi
}

Expand Down
Loading
Loading