Skip to content

Commit 85b4e6d

Browse files
authored
GH-46746: [C++] Assume AWS SDK >= 1.11.0 (#46742)
### Rationale for this change We're currently carrying compatibility code for AWS SDK before 1.9.0. However, we almost always bundled our own build of AWS SDK, so we should be able to rely on a more recent version. Version 1.11.0 was [released in January 2023](https://github.com/aws/aws-sdk-cpp/releases/tag/1.11.0), which is already 2.5 years ago, so this should be a reasonable target. This will make the S3 filesystem code more maintainable. ### Are these changes tested? Yes, on existing CI builds. ### Are there any user-facing changes? Users won't be able to build Arrow C++ if they have a very old version of the AWS SDK installed, unless they pass `-DAWSSDK_SOURCE=BUNDLED`. * GitHub Issue: #46746 Authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent e6e50de commit 85b4e6d

File tree

3 files changed

+22
-106
lines changed

3 files changed

+22
-106
lines changed

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5278,7 +5278,12 @@ function(build_awssdk)
52785278
endfunction()
52795279

52805280
if(ARROW_S3)
5281-
resolve_dependency(AWSSDK HAVE_ALT TRUE)
5281+
# Keep this in sync with s3fs.cc
5282+
resolve_dependency(AWSSDK
5283+
HAVE_ALT
5284+
TRUE
5285+
REQUIRED_VERSION
5286+
1.11.0)
52825287

52835288
message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}")
52845289
message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}")

cpp/src/arrow/filesystem/s3_internal.h

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,6 @@
4040
#include "arrow/util/print_internal.h"
4141
#include "arrow/util/string.h"
4242

43-
#ifndef ARROW_AWS_SDK_VERSION_CHECK
44-
// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
45-
# if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
46-
defined(AWS_SDK_VERSION_PATCH)
47-
// Redundant "(...)" are for suppressing "Weird number of spaces at
48-
// line-start. Are you using a 2-space indent? [whitespace/indent]
49-
// [3]" errors...
50-
# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) \
51-
((AWS_SDK_VERSION_MAJOR > (major) || \
52-
(AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || \
53-
((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
54-
AWS_SDK_VERSION_PATCH >= (patch)))))
55-
# else
56-
# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
57-
# endif
58-
#endif // !ARROW_AWS_SDK_VERSION_CHECK
59-
60-
#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 201)
61-
# define ARROW_S3_HAS_SSE_CUSTOMER_KEY
62-
#endif
63-
6443
namespace arrow {
6544
namespace fs {
6645
namespace internal {
@@ -350,14 +329,9 @@ inline Result<std::optional<SSECustomerKeyHeaders>> GetSSECustomerKeyHeaders(
350329
if (sse_customer_key.empty()) {
351330
return std::nullopt;
352331
}
353-
#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
354332
ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECustomerKeyMD5(sse_customer_key));
355333
return SSECustomerKeyHeaders{arrow::util::base64_encode(sse_customer_key), md5,
356334
"AES256"};
357-
#else
358-
return Status::NotImplemented(
359-
"SSE customer key not supported by this version of the AWS SDK");
360-
#endif
361335
}
362336

363337
template <typename S3RequestType>
@@ -366,16 +340,11 @@ Status SetSSECustomerKey(S3RequestType* request, const std::string& sse_customer
366340
if (!maybe_headers.has_value()) {
367341
return Status::OK();
368342
}
369-
#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
370343
auto headers = std::move(maybe_headers).value();
371344
request->SetSSECustomerKey(headers.sse_customer_key);
372345
request->SetSSECustomerKeyMD5(headers.sse_customer_key_md5);
373346
request->SetSSECustomerAlgorithm(headers.sse_customer_algorithm);
374347
return Status::OK();
375-
#else
376-
return Status::NotImplemented(
377-
"SSE customer key not supported by this version of the AWS SDK");
378-
#endif
379348
}
380349

381350
} // namespace internal

cpp/src/arrow/filesystem/s3fs.cc

Lines changed: 16 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,13 @@
5555
#include <aws/core/utils/logging/ConsoleLogSystem.h>
5656
#include <aws/core/utils/stream/PreallocatedStreamBuf.h>
5757
#include <aws/core/utils/xml/XmlSerializer.h>
58+
#include <aws/crt/io/Bootstrap.h>
59+
#include <aws/crt/io/EventLoopGroup.h>
60+
#include <aws/crt/io/HostResolver.h>
5861
#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
5962
#include <aws/s3/S3Client.h>
63+
#include <aws/s3/S3ClientConfiguration.h>
64+
#include <aws/s3/S3EndpointProvider.h>
6065
#include <aws/s3/S3Errors.h>
6166
#include <aws/s3/model/AbortMultipartUploadRequest.h>
6267
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
@@ -78,42 +83,18 @@
7883
#include <aws/s3/model/PutObjectResult.h>
7984
#include <aws/s3/model/UploadPartRequest.h>
8085

81-
// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
82-
#if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
83-
defined(AWS_SDK_VERSION_PATCH)
8486
// Redundant "(...)" are for suppressing "Weird number of spaces at
8587
// line-start. Are you using a 2-space indent? [whitespace/indent]
8688
// [3]" errors...
87-
# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) \
88-
((AWS_SDK_VERSION_MAJOR > (major) || \
89-
(AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || \
90-
((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
91-
AWS_SDK_VERSION_PATCH >= (patch)))))
92-
#else
93-
# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
94-
#endif
95-
96-
// This feature is available since 1.9.0 but
97-
// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7. So
98-
// we can't use this feature for [1.9.0,1.9.6]. If it's a problem,
99-
// please report it to our issue tracker.
100-
#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 0)
101-
# define ARROW_S3_HAS_CRT
102-
#endif
103-
104-
#if ARROW_AWS_SDK_VERSION_CHECK(1, 10, 0)
105-
# define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
106-
#endif
107-
108-
#ifdef ARROW_S3_HAS_CRT
109-
# include <aws/crt/io/Bootstrap.h>
110-
# include <aws/crt/io/EventLoopGroup.h>
111-
# include <aws/crt/io/HostResolver.h>
112-
#endif
113-
114-
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
115-
# include <aws/s3/S3ClientConfiguration.h>
116-
# include <aws/s3/S3EndpointProvider.h>
89+
#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) \
90+
((AWS_SDK_VERSION_MAJOR > (major) || \
91+
(AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || \
92+
((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
93+
AWS_SDK_VERSION_PATCH >= (patch)))))
94+
95+
// Keep this in sync with ThirdPartyToolChain.cmake
96+
#if !defined(AWS_SDK_VERSION_MAJOR) || !ARROW_AWS_SDK_VERSION_CHECK(1, 11, 0)
97+
# error "AWS SDK version 1.11.0 or later is required"
11798
#endif
11899

119100
#include "arrow/util/windows_fixup.h"
@@ -792,22 +773,6 @@ class S3Client : public Aws::S3::S3Client {
792773
std::shared_ptr<S3RetryStrategy> s3_retry_strategy_;
793774
};
794775

795-
// In AWS SDK < 1.8, Aws::Client::ClientConfiguration::followRedirects is a bool.
796-
template <bool Never = false>
797-
void DisableRedirectsImpl(bool* followRedirects) {
798-
*followRedirects = false;
799-
}
800-
801-
// In AWS SDK >= 1.8, it's a Aws::Client::FollowRedirectsPolicy scoped enum.
802-
template <typename PolicyEnum, PolicyEnum Never = PolicyEnum::NEVER>
803-
void DisableRedirectsImpl(PolicyEnum* followRedirects) {
804-
*followRedirects = Never;
805-
}
806-
807-
void DisableRedirects(Aws::Client::ClientConfiguration* c) {
808-
DisableRedirectsImpl(&c->followRedirects);
809-
}
810-
811776
// -----------------------------------------------------------------------
812777
// S3 client protection against use after finalization
813778
//
@@ -978,8 +943,6 @@ Result<std::shared_ptr<S3ClientHolder>> GetClientHolder(
978943
// -----------------------------------------------------------------------
979944
// S3 client factory: build S3Client from S3Options
980945

981-
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
982-
983946
// GH-40279: standard initialization of S3Client creates a new `S3EndpointProvider`
984947
// every time. Its construction takes 1ms, which makes instantiating every S3Client
985948
// very costly (see upstream bug report
@@ -1104,8 +1067,6 @@ class EndpointProviderCache {
11041067
std::unordered_map<EndpointConfigKey, CacheValue> cache_;
11051068
};
11061069

1107-
#endif // ARROW_S3_HAS_S3CLIENT_CONFIGURATION
1108-
11091070
class ClientBuilder {
11101071
public:
11111072
explicit ClientBuilder(S3Options options) : options_(std::move(options)) {}
@@ -1188,17 +1149,10 @@ class ClientBuilder {
11881149
const bool use_virtual_addressing =
11891150
options_.endpoint_override.empty() || options_.force_virtual_addressing;
11901151

1191-
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
11921152
client_config_.useVirtualAddressing = use_virtual_addressing;
11931153
auto endpoint_provider = EndpointProviderCache::Instance()->Lookup(client_config_);
11941154
auto client = std::make_shared<S3Client>(credentials_provider_, endpoint_provider,
11951155
client_config_);
1196-
#else
1197-
auto client = std::make_shared<S3Client>(
1198-
credentials_provider_, client_config_,
1199-
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
1200-
use_virtual_addressing);
1201-
#endif
12021156
client->s3_retry_strategy_ = options_.retry_strategy;
12031157
return GetClientHolder(std::move(client));
12041158
}
@@ -1207,11 +1161,7 @@ class ClientBuilder {
12071161

12081162
protected:
12091163
S3Options options_;
1210-
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
12111164
Aws::S3::S3ClientConfiguration client_config_;
1212-
#else
1213-
Aws::Client::ClientConfiguration client_config_;
1214-
#endif
12151165
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
12161166
};
12171167

@@ -1275,7 +1225,8 @@ class RegionResolver {
12751225
Status Init() {
12761226
DCHECK(builder_.options().endpoint_override.empty());
12771227
// On Windows with AWS SDK >= 1.8, it is necessary to disable redirects (ARROW-10085).
1278-
DisableRedirects(builder_.mutable_config());
1228+
builder_.mutable_config()->followRedirects =
1229+
Aws::Client::FollowRedirectsPolicy::NEVER;
12791230
return builder_.BuildClient().Value(&holder_);
12801231
}
12811232

@@ -2391,8 +2342,6 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
23912342
req.SetCopySourceSSECustomerKeyMD5(sse_headers.sse_customer_key_md5);
23922343
req.SetCopySourceSSECustomerAlgorithm(sse_headers.sse_customer_algorithm);
23932344
}
2394-
// ARROW-13048: Copy source "Must be URL-encoded" according to AWS SDK docs.
2395-
// However at least in 1.8 and 1.9 the SDK URL-encodes the path for you
23962345
req.SetCopySource(src_path.ToAwsString());
23972346
return OutcomeToStatus(
23982347
std::forward_as_tuple("When copying key '", src_path.key, "' in bucket '",
@@ -3500,9 +3449,7 @@ struct AwsInstance {
35003449
return;
35013450
}
35023451
GetClientFinalizer()->Finalize();
3503-
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
35043452
EndpointProviderCache::Instance()->Reset();
3505-
#endif
35063453
Aws::ShutdownAPI(aws_options_);
35073454
}
35083455
}
@@ -3529,7 +3476,6 @@ struct AwsInstance {
35293476

35303477
#undef LOG_LEVEL_CASE
35313478

3532-
#ifdef ARROW_S3_HAS_CRT
35333479
aws_options_.ioOptions.clientBootstrap_create_fn =
35343480
[ev_threads = options.num_event_loop_threads]() {
35353481
// https://github.com/aws/aws-sdk-cpp/blob/1.11.15/src/aws-cpp-sdk-core/source/Aws.cpp#L65
@@ -3541,18 +3487,14 @@ struct AwsInstance {
35413487
client_bootstrap->EnableBlockingShutdown();
35423488
return client_bootstrap;
35433489
};
3544-
#endif
35453490
aws_options_.loggingOptions.logLevel = aws_log_level;
35463491
// By default the AWS SDK logs to files, log to console instead
35473492
aws_options_.loggingOptions.logger_create_fn = [this] {
35483493
return std::make_shared<Aws::Utils::Logging::ConsoleLogSystem>(
35493494
aws_options_.loggingOptions.logLevel);
35503495
};
3551-
#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 272)
35523496
// ARROW-18290: escape all special chars for compatibility with non-AWS S3 backends.
3553-
// This configuration options is only available with AWS SDK 1.9.272 and later.
35543497
aws_options_.httpOptions.compliantRfc3986Encoding = true;
3555-
#endif
35563498
aws_options_.httpOptions.installSigPipeHandler = options.install_sigpipe_handler;
35573499
Aws::InitAPI(aws_options_);
35583500
}

0 commit comments

Comments
 (0)