Skip to content

Commit f8ec2a2

Browse files
authored
Merge pull request #220 from Mytherin/regionhandling
S3: Improve handling around regions, and improve S3 error display
2 parents e722111 + 7a79e91 commit f8ec2a2

File tree

7 files changed

+302
-126
lines changed

7 files changed

+302
-126
lines changed

src/httpfs.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -286,13 +286,7 @@ unique_ptr<HTTPResponse> HTTPFileSystem::GetRangeRequest(FileHandle &handle, str
286286
url, header_map, hfh.http_params,
287287
[&](const HTTPResponse &response) {
288288
if (static_cast<int>(response.status) >= 400) {
289-
string error =
290-
"HTTP GET error on '" + url + "' (HTTP " + to_string(static_cast<int>(response.status)) + ")";
291-
if (response.status == HTTPStatusCode::RangeNotSatisfiable_416) {
292-
error += " This could mean the file was changed. Try disabling the duckdb http metadata cache "
293-
"if enabled, and confirm the server supports range requests.";
294-
}
295-
throw HTTPException(response, error);
289+
throw GetHTTPError(handle, response, url);
296290
}
297291
if (static_cast<int>(response.status) < 300) { // done redirecting
298292
out_offset = 0;
@@ -863,6 +857,20 @@ void HTTPFileHandle::AllocateReadBuffer(optional_ptr<FileOpener> opener) {
863857
read_buffer = allocator.Allocate(INITIAL_READ_BUFFER_LEN);
864858
}
865859

860+
void HTTPFileHandle::InitializeFromCacheEntry(const HTTPMetadataCacheEntry &cache_entry) {
861+
last_modified = cache_entry.last_modified;
862+
length = cache_entry.length;
863+
etag = cache_entry.etag;
864+
}
865+
866+
HTTPMetadataCacheEntry HTTPFileHandle::GetCacheEntry() const {
867+
HTTPMetadataCacheEntry result;
868+
result.length = length;
869+
result.last_modified = last_modified;
870+
result.etag = etag;
871+
return result;
872+
}
873+
866874
void HTTPFileHandle::Initialize(optional_ptr<FileOpener> opener) {
867875
auto &hfs = file_system.Cast<HTTPFileSystem>();
868876
http_params.state = HTTPState::TryGetState(opener);
@@ -888,9 +896,7 @@ void HTTPFileHandle::Initialize(optional_ptr<FileOpener> opener) {
888896
bool found = current_cache->Find(path, value);
889897

890898
if (found) {
891-
last_modified = value.last_modified;
892-
length = value.length;
893-
etag = value.etag;
899+
InitializeFromCacheEntry(value);
894900

895901
if (flags.OpenForReading() && !SkipBuffer()) {
896902
AllocateReadBuffer(opener);
@@ -908,7 +914,7 @@ void HTTPFileHandle::Initialize(optional_ptr<FileOpener> opener) {
908914
FullDownload(hfs, should_write_cache);
909915
}
910916
if (should_write_cache) {
911-
current_cache->Insert(path, {length, last_modified, etag});
917+
current_cache->Insert(path, GetCacheEntry());
912918
}
913919

914920
if (!SkipBuffer()) {

src/include/http_metadata_cache.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct HTTPMetadataCacheEntry {
1919
idx_t length;
2020
timestamp_t last_modified;
2121
string etag;
22+
unordered_map<string, string> properties;
2223
};
2324

2425
// Simple cache with a max age for an entry to be valid

src/include/httpfs.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ class HTTPFileHandle : public FileHandle {
129129
//! TODO: make base function virtual?
130130
void TryAddLogger(FileOpener &opener);
131131

132+
virtual void InitializeFromCacheEntry(const HTTPMetadataCacheEntry &cache_entry);
133+
virtual HTTPMetadataCacheEntry GetCacheEntry() const;
134+
132135
public:
133136
//! Fully downloads a file
134137
void FullDownload(HTTPFileSystem &hfs, bool &should_write_cache);

src/include/s3fs.hpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ struct S3AuthParams {
6262

6363
static S3AuthParams ReadFrom(optional_ptr<FileOpener> opener, FileOpenerInfo &info);
6464
static S3AuthParams ReadFrom(S3KeyValueReader &secret_reader, const std::string &file_path);
65+
void SetRegion(string region_p);
66+
67+
private:
68+
void InitializeEndpoint();
6569
};
6670

6771
struct AWSEnvironmentCredentialsProvider {
@@ -140,17 +144,7 @@ class S3FileHandle : public HTTPFileHandle {
140144

141145
public:
142146
S3FileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags, unique_ptr<HTTPParams> http_params_p,
143-
const S3AuthParams &auth_params_p, const S3ConfigParams &config_params_p)
144-
: HTTPFileHandle(fs, file, flags, std::move(http_params_p)), auth_params(auth_params_p),
145-
config_params(config_params_p), uploads_in_progress(0), parts_uploaded(0), upload_finalized(false),
146-
uploader_has_error(false), upload_exception(nullptr) {
147-
auto_fallback_to_full_file_download = false;
148-
if (flags.OpenForReading() && flags.OpenForWriting()) {
149-
throw NotImplementedException("Cannot open an HTTP file for both reading and writing");
150-
} else if (flags.OpenForAppending()) {
151-
throw NotImplementedException("Cannot open an HTTP file for appending");
152-
}
153-
}
147+
const S3AuthParams &auth_params_p, const S3ConfigParams &config_params_p);
154148
~S3FileHandle() override;
155149

156150
S3AuthParams auth_params;
@@ -163,6 +157,10 @@ class S3FileHandle : public HTTPFileHandle {
163157

164158
shared_ptr<S3WriteBuffer> GetBuffer(uint16_t write_buffer_idx);
165159

160+
protected:
161+
void InitializeFromCacheEntry(const HTTPMetadataCacheEntry &cache_entry) override;
162+
HTTPMetadataCacheEntry GetCacheEntry() const override;
163+
166164
protected:
167165
string multipart_upload_id;
168166
size_t part_size;
@@ -258,6 +256,7 @@ class S3FileSystem : public HTTPFileSystem {
258256
}
259257

260258
static string GetS3BadRequestError(const S3AuthParams &s3_auth_params, string correct_region = "");
259+
static string ParseS3Error(const string &error);
261260
static string GetS3AuthError(const S3AuthParams &s3_auth_params);
262261
static string GetGCSAuthError(const S3AuthParams &s3_auth_params);
263262
static HTTPException GetS3Error(const S3AuthParams &s3_auth_params, const HTTPResponse &response,
@@ -289,7 +288,7 @@ class S3FileSystem : public HTTPFileSystem {
289288

290289
// Helper class to do s3 ListObjectV2 api call https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
291290
struct AWSListObjectV2 {
292-
static string Request(const string &path, HTTPParams &http_params, const S3AuthParams &s3_auth_params,
291+
static string Request(const string &path, HTTPParams &http_params, S3AuthParams &s3_auth_params,
293292
string &continuation_token, optional_idx max_keys = optional_idx());
294293
static void ParseFileList(string &aws_response, vector<OpenFileInfo> &result);
295294
static vector<string> ParseCommonPrefix(string &aws_response);

0 commit comments

Comments
 (0)