Skip to content

Commit 04a0e53

Browse files
committed
# Conflicts: # extension/httpfs/s3fs.cpp
2 parents eba5b61 + dc87eee commit 04a0e53

13 files changed

+97
-67
lines changed

extension/httpfs/create_secret_functions.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateSecretFunctionInternal(Cli
111111
} else if (lower_name == "requester_pays") {
112112
if (named_param.second.type() != LogicalType::BOOLEAN) {
113113
throw InvalidInputException("Invalid type past to secret option: '%s', found '%s', expected: 'BOOLEAN'",
114-
lower_name, named_param.second.type().ToString());
114+
lower_name, named_param.second.type().ToString());
115115
}
116116
secret->secret_map["requester_pays"] = Value::BOOLEAN(named_param.second.GetValue<bool>());
117117
} else if (lower_name == "bearer_token" && input.type == "gcs") {
@@ -195,7 +195,7 @@ void CreateS3SecretFunctions::SetBaseNamedParams(CreateSecretFunction &function,
195195
function.named_parameters["use_ssl"] = LogicalType::BOOLEAN;
196196
function.named_parameters["kms_key_id"] = LogicalType::VARCHAR;
197197
function.named_parameters["url_compatibility_mode"] = LogicalType::BOOLEAN;
198-
function.named_parameters["requester_pays"] = LogicalType::BOOLEAN;
198+
function.named_parameters["requester_pays"] = LogicalType::BOOLEAN;
199199

200200
// Whether a secret refresh attempt should be made when the secret appears to be incorrect
201201
function.named_parameters["refresh"] = LogicalType::VARCHAR;
@@ -214,7 +214,7 @@ void CreateS3SecretFunctions::SetBaseNamedParams(CreateSecretFunction &function,
214214
if (type == "r2") {
215215
function.named_parameters["account_id"] = LogicalType::VARCHAR;
216216
}
217-
217+
218218
if (type == "gcs") {
219219
function.named_parameters["bearer_token"] = LogicalType::VARCHAR;
220220
}

extension/httpfs/httpfs.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ unique_ptr<HTTPParams> HTTPFSUtil::InitializeParameters(optional_ptr<FileOpener>
5151
info);
5252
FileOpener::TryGetCurrentSetting(opener, "ca_cert_file", result->ca_cert_file, info);
5353
FileOpener::TryGetCurrentSetting(opener, "hf_max_per_page", result->hf_max_per_page, info);
54+
FileOpener::TryGetCurrentSetting(opener, "unsafe_disable_etag_checks", result->unsafe_disable_etag_checks, info);
5455

5556
// HTTP Secret lookups
5657
KeyValueSecretReader settings_reader(*opener, info, "http");
@@ -226,6 +227,17 @@ unique_ptr<HTTPResponse> HTTPFileSystem::GetRangeRequest(FileHandle &handle, str
226227
}
227228
if (static_cast<int>(response.status) < 300) { // done redirecting
228229
out_offset = 0;
230+
231+
if (!hfh.http_params.unsafe_disable_etag_checks && hfh.etag.empty() && response.HasHeader("ETag")) {
232+
string responseEtag = response.GetHeaderValue("ETag");
233+
234+
if (!responseEtag.empty() && responseEtag != hfh.etag) {
235+
throw HTTPException(response, "ETag was initially %s and now it returned %s, this likely means the remote file has changed.\nTry to restart the read or close the file-handle and read the file again (e.g. `DETACH` in the file is a database file).\nYou can disable checking etags via `SET unsafe_disable_etag_checks = true;`", hfh.etag, response.GetHeaderValue("ETag"));
236+
}
237+
}
238+
239+
240+
229241
if (response.HasHeader("Content-Length")) {
230242
auto content_length = stoll(response.GetHeaderValue("Content-Length"));
231243
if ((idx_t)content_length != buffer_out_len) {
@@ -275,7 +287,8 @@ void TimestampToTimeT(timestamp_t timestamp, time_t &result) {
275287
HTTPFileHandle::HTTPFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags,
276288
unique_ptr<HTTPParams> params_p)
277289
: FileHandle(fs, file.path, flags), params(std::move(params_p)), http_params(params->Cast<HTTPFSParams>()),
278-
flags(flags), length(0), force_full_download(false), buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) {
290+
flags(flags), length(0), force_full_download(false), buffer_available(0), buffer_idx(0), file_offset(0),
291+
buffer_start(0), buffer_end(0) {
279292
// check if the handle has extended properties that can be set directly in the handle
280293
// if we have these properties we don't need to do a head request to obtain them later
281294
if (file.extended_info) {

extension/httpfs/httpfs_client_wasm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ unique_ptr<HTTPClient> HTTPFSUtil::InitializeClient(HTTPParams &http_params, con
99

1010
unordered_map<string, string> HTTPFSUtil::ParseGetParameters(const string &text) {
1111
unordered_map<string, string> result;
12-
//TODO: HTTPFSUtil::ParseGetParameters is currently not implemented
12+
// TODO: HTTPFSUtil::ParseGetParameters is currently not implemented
1313
return result;
1414
}
1515

extension/httpfs/httpfs_extension.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,23 @@
1313
namespace duckdb {
1414

1515
static void SetHttpfsClientImplementation(DBConfig &config, const string &value) {
16-
if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") {
17-
if (value == "wasm" || value == "default") {
18-
// Already handled, do not override
19-
return;
20-
}
21-
throw InvalidInputException("Unsupported option for httpfs_client_implementation, only `wasm` and "
22-
"`default` are currently supported for duckdb-wasm");
23-
}
24-
if (value == "httplib" || value == "default") {
25-
if (!config.http_util || config.http_util->GetName() != "HTTPFSUtil") {
26-
config.http_util = make_shared_ptr<HTTPFSUtil>();
27-
}
16+
if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") {
17+
if (value == "wasm" || value == "default") {
18+
// Already handled, do not override
2819
return;
2920
}
30-
throw InvalidInputException("Unsupported option for httpfs_client_implementation, only `curl`, `httplib` and "
31-
"`default` are currently supported");
21+
throw InvalidInputException("Unsupported option for httpfs_client_implementation, only `wasm` and "
22+
"`default` are currently supported for duckdb-wasm");
23+
}
24+
if (value == "httplib" || value == "default") {
25+
if (!config.http_util || config.http_util->GetName() != "HTTPFSUtil") {
26+
config.http_util = make_shared_ptr<HTTPFSUtil>();
27+
}
28+
return;
3229
}
30+
throw InvalidInputException("Unsupported option for httpfs_client_implementation, only `curl`, `httplib` and "
31+
"`default` are currently supported");
32+
}
3333

3434
static void LoadInternal(DatabaseInstance &instance) {
3535
auto &fs = instance.GetFileSystem();
@@ -79,6 +79,8 @@ static void LoadInternal(DatabaseInstance &instance) {
7979
LogicalType::UBIGINT, Value(10000));
8080
config.AddExtensionOption("s3_uploader_thread_limit", "S3 Uploader global thread limit", LogicalType::UBIGINT,
8181
Value(50));
82+
config.AddExtensionOption("unsafe_disable_etag_checks", "Disable checks on ETag consistency",
83+
LogicalType::BOOLEAN, Value(false));
8284

8385
// HuggingFace options
8486
config.AddExtensionOption("hf_max_per_page", "Debug option to limit number of items returned in list requests",

extension/httpfs/include/httpfs_client.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct HTTPFSParams : public HTTPParams {
2121
idx_t hf_max_per_page = DEFAULT_HF_MAX_PER_PAGE;
2222
string ca_cert_file;
2323
string bearer_token;
24+
bool unsafe_disable_etag_checks {false};
2425
shared_ptr<HTTPState> state;
2526
};
2627

extension/httpfs/include/s3fs.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ struct S3AuthParams {
3030
string url_style;
3131
bool use_ssl = true;
3232
bool s3_url_compatibility_mode = false;
33-
bool requester_pays = false;
34-
string oauth2_bearer_token; // OAuth2 bearer token for GCS
33+
bool requester_pays = false;
34+
string oauth2_bearer_token; // OAuth2 bearer token for GCS
3535

3636
static S3AuthParams ReadFrom(optional_ptr<FileOpener> opener, FileOpenerInfo &info);
3737
};
@@ -47,7 +47,6 @@ struct AWSEnvironmentCredentialsProvider {
4747
static constexpr const char *DUCKDB_KMS_KEY_ID_ENV_VAR = "DUCKDB_S3_KMS_KEY_ID";
4848
static constexpr const char *DUCKDB_REQUESTER_PAYS_ENV_VAR = "DUCKDB_S3_REQUESTER_PAYS";
4949

50-
5150
explicit AWSEnvironmentCredentialsProvider(DBConfig &config) : config(config) {};
5251

5352
DBConfig &config;

0 commit comments

Comments
 (0)