Skip to content

Commit 51bc2a6

Browse files
authored
GH-41797: [C++][S3] Remove GetBucketRegion hack for newer AWS SDK versions (#41798)
### Rationale for this change To get the region a S3 bucket resides on, it is required to issue a HeadBucket request and parse the response headers for a certain header value. Unfortunately, the AWS SDK doesn't let us access arbitrary headers on successful responses for S3 model requests, which had us implement a workaround by calling lower-level SDK APIs. However, the SDK recently added a `GetBucketRegion` method on `HeadBucketRequest`, which obsoletes the need for this workaround. We now use this method if the available AWS SDK version is recent enough. ### Are these changes tested? By existing tests on the various CI configurations. ### Are there any user-facing changes? No. * GitHub Issue: #41797 Authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent cd7ebc0 commit 51bc2a6

File tree

1 file changed

+59
-22
lines changed

1 file changed

+59
-22
lines changed

cpp/src/arrow/filesystem/s3fs.cc

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -601,44 +601,81 @@ class S3Client : public Aws::S3::S3Client {
601601
public:
602602
using Aws::S3::S3Client::S3Client;
603603

604+
static inline constexpr auto kBucketRegionHeaderName = "x-amz-bucket-region";
605+
606+
std::string GetBucketRegionFromHeaders(
607+
const Aws::Http::HeaderValueCollection& headers) {
608+
const auto it = headers.find(ToAwsString(kBucketRegionHeaderName));
609+
if (it != headers.end()) {
610+
return std::string(FromAwsString(it->second));
611+
}
612+
return std::string();
613+
}
614+
615+
template <typename ErrorType>
616+
Result<std::string> GetBucketRegionFromError(
617+
const std::string& bucket, const Aws::Client::AWSError<ErrorType>& error) {
618+
std::string region = GetBucketRegionFromHeaders(error.GetResponseHeaders());
619+
if (!region.empty()) {
620+
return region;
621+
} else if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) {
622+
return Status::IOError("Bucket '", bucket, "' not found");
623+
} else {
624+
return ErrorToStatus(
625+
std::forward_as_tuple("When resolving region for bucket '", bucket, "': "),
626+
"HeadBucket", error);
627+
}
628+
}
629+
630+
#if ARROW_AWS_SDK_VERSION_CHECK(1, 11, 212)
631+
// HeadBucketResult::GetBucketRegion appeared in AWS SDK 1.11.212
632+
Result<std::string> GetBucketRegion(const std::string& bucket,
633+
const S3Model::HeadBucketRequest& request) {
634+
auto outcome = this->HeadBucket(request);
635+
if (!outcome.IsSuccess()) {
636+
return GetBucketRegionFromError(bucket, outcome.GetError());
637+
}
638+
auto&& region = std::move(outcome).GetResult().GetBucketRegion();
639+
if (region.empty()) {
640+
return Status::IOError("When resolving region for bucket '", request.GetBucket(),
641+
"': missing 'x-amz-bucket-region' header in response");
642+
}
643+
return region;
644+
}
645+
#else
604646
// To get a bucket's region, we must extract the "x-amz-bucket-region" header
605647
// from the response to a HEAD bucket request.
606648
// Unfortunately, the S3Client APIs don't let us access the headers of successful
607649
// responses. So we have to cook a AWS request and issue it ourselves.
608-
609-
Result<std::string> GetBucketRegion(const S3Model::HeadBucketRequest& request) {
650+
Result<std::string> GetBucketRegion(const std::string& bucket,
651+
const S3Model::HeadBucketRequest& request) {
610652
auto uri = GeneratePresignedUrl(request.GetBucket(),
611653
/*key=*/"", Aws::Http::HttpMethod::HTTP_HEAD);
612654
// NOTE: The signer region argument isn't passed here, as there's no easy
613655
// way of computing it (the relevant method is private).
614656
auto outcome = MakeRequest(uri, request, Aws::Http::HttpMethod::HTTP_HEAD,
615657
Aws::Auth::SIGV4_SIGNER);
616-
const auto code = outcome.IsSuccess() ? outcome.GetResult().GetResponseCode()
617-
: outcome.GetError().GetResponseCode();
618-
const auto& headers = outcome.IsSuccess()
619-
? outcome.GetResult().GetHeaderValueCollection()
620-
: outcome.GetError().GetResponseHeaders();
621-
622-
const auto it = headers.find(ToAwsString("x-amz-bucket-region"));
623-
if (it == headers.end()) {
624-
if (code == Aws::Http::HttpResponseCode::NOT_FOUND) {
625-
return Status::IOError("Bucket '", request.GetBucket(), "' not found");
626-
} else if (!outcome.IsSuccess()) {
627-
return ErrorToStatus(std::forward_as_tuple("When resolving region for bucket '",
628-
request.GetBucket(), "': "),
629-
"HeadBucket", outcome.GetError());
630-
} else {
631-
return Status::IOError("When resolving region for bucket '", request.GetBucket(),
632-
"': missing 'x-amz-bucket-region' header in response");
633-
}
658+
if (!outcome.IsSuccess()) {
659+
return GetBucketRegionFromError(bucket, outcome.GetError());
660+
}
661+
std::string region =
662+
GetBucketRegionFromHeaders(outcome.GetResult().GetHeaderValueCollection());
663+
if (!region.empty()) {
664+
return region;
665+
} else if (outcome.GetResult().GetResponseCode() ==
666+
Aws::Http::HttpResponseCode::NOT_FOUND) {
667+
return Status::IOError("Bucket '", request.GetBucket(), "' not found");
668+
} else {
669+
return Status::IOError("When resolving region for bucket '", request.GetBucket(),
670+
"': missing 'x-amz-bucket-region' header in response");
634671
}
635-
return std::string(FromAwsString(it->second));
636672
}
673+
#endif
637674

638675
Result<std::string> GetBucketRegion(const std::string& bucket) {
639676
S3Model::HeadBucketRequest req;
640677
req.SetBucket(ToAwsString(bucket));
641-
return GetBucketRegion(req);
678+
return GetBucketRegion(bucket, req);
642679
}
643680

644681
S3Model::CompleteMultipartUploadOutcome CompleteMultipartUploadWithErrorFixup(

0 commit comments

Comments
 (0)