Skip to content

Commit 4cf8a59

Browse files
authored
Merge pull request #52 from carlopi/httputil_port_changes
HTTPUtil: port changes from duckdb's #17486
2 parents 5d76c4b + 6a09982 commit 4cf8a59

File tree

9 files changed

+73
-199
lines changed

9 files changed

+73
-199
lines changed

duckdb

Submodule duckdb updated 91 files

extension/httpfs/hffs.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static string ParseNextUrlFromLinkHeader(const string &link_header_content) {
4545
HFFileHandle::~HFFileHandle() {};
4646

4747
unique_ptr<HTTPClient> HFFileHandle::CreateClient() {
48-
return http_params.http_util->InitializeClient(http_params, parsed_url.endpoint);
48+
return http_params.http_util.InitializeClient(http_params, parsed_url.endpoint);
4949
}
5050

5151
string HuggingFaceFileSystem::ListHFRequest(ParsedHFUrl &url, HTTPFSParams &http_params, string &next_page_url,
@@ -69,7 +69,7 @@ string HuggingFaceFileSystem::ListHFRequest(ParsedHFUrl &url, HTTPFSParams &http
6969
response << string(const_char_ptr_cast(data), data_length);
7070
return true;
7171
});
72-
auto res = http_params.http_util->Request(get_request);
72+
auto res = http_params.http_util.Request(get_request);
7373
if (res->status != HTTPStatusCode::OK_200) {
7474
throw IOException(res->GetError() + " error for HTTP GET to '" + next_page_url + "'");
7575
}
@@ -205,7 +205,9 @@ vector<OpenFileInfo> HuggingFaceFileSystem::Glob(const string &path, FileOpener
205205

206206
FileOpenerInfo info;
207207
info.file_path = path;
208-
auto http_params = HTTPFSParams::ReadFrom(opener, info);
208+
auto http_util = HTTPFSUtil::GetHTTPUtil(opener);
209+
auto params = http_util->InitializeParameters(opener, info);
210+
auto &http_params = params->Cast<HTTPFSParams>();
209211
SetParams(http_params, path, opener);
210212
auto http_state = HTTPState::TryGetState(opener).get();
211213

@@ -278,10 +280,11 @@ unique_ptr<HTTPFileHandle> HuggingFaceFileSystem::CreateHandle(const OpenFileInf
278280
FileOpenerInfo info;
279281
info.file_path = file.path;
280282

281-
auto params = HTTPFSParams::ReadFrom(opener, info);
282-
SetParams(params, file.path, opener);
283+
auto http_util = HTTPFSUtil::GetHTTPUtil(opener);
284+
auto params = http_util->InitializeParameters(opener, info);
285+
SetParams(params->Cast<HTTPFSParams>(), file.path, opener);
283286

284-
return duckdb::make_uniq<HFFileHandle>(*this, std::move(parsed_url), file, flags, params);
287+
return duckdb::make_uniq<HFFileHandle>(*this, std::move(parsed_url), file, flags, std::move(params));
285288
}
286289

287290
void HuggingFaceFileSystem::SetParams(HTTPFSParams &params, const string &path, optional_ptr<FileOpener> opener) {

extension/httpfs/httpfs.cpp

Lines changed: 36 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
namespace duckdb {
2323

24-
shared_ptr<HTTPUtil> GetHTTPUtil(optional_ptr<FileOpener> opener) {
24+
shared_ptr<HTTPUtil> HTTPFSUtil::GetHTTPUtil(optional_ptr<FileOpener> opener) {
2525
if (opener) {
2626
auto db = opener->TryGetDatabase();
2727
if (db) {
@@ -32,65 +32,55 @@ shared_ptr<HTTPUtil> GetHTTPUtil(optional_ptr<FileOpener> opener) {
3232
return make_shared_ptr<HTTPFSUtil>();
3333
}
3434

35-
HTTPFSParams HTTPFSParams::ReadFrom(optional_ptr<FileOpener> opener, optional_ptr<FileOpenerInfo> info) {
36-
HTTPFSParams result;
37-
result.http_util = GetHTTPUtil(opener);
35+
unique_ptr<HTTPParams> HTTPFSUtil::InitializeParameters(optional_ptr<FileOpener> opener, optional_ptr<FileOpenerInfo> info) {
36+
auto result = make_uniq<HTTPFSParams>(*this);
37+
result->Initialize(opener);
3838

3939
// No point in continueing without an opener
4040
if (!opener) {
41-
return result;
41+
return std::move(result);
4242
}
4343

4444
Value value;
4545

4646
// Setting lookups
47-
FileOpener::TryGetCurrentSetting(opener, "http_timeout", result.timeout, info);
48-
FileOpener::TryGetCurrentSetting(opener, "force_download", result.force_download, info);
49-
FileOpener::TryGetCurrentSetting(opener, "http_retries", result.retries, info);
50-
FileOpener::TryGetCurrentSetting(opener, "http_retry_wait_ms", result.retry_wait_ms, info);
51-
FileOpener::TryGetCurrentSetting(opener, "http_retry_backoff", result.retry_backoff, info);
52-
FileOpener::TryGetCurrentSetting(opener, "http_keep_alive", result.keep_alive, info);
53-
FileOpener::TryGetCurrentSetting(opener, "enable_server_cert_verification", result.enable_server_cert_verification,
47+
FileOpener::TryGetCurrentSetting(opener, "http_timeout", result->timeout, info);
48+
FileOpener::TryGetCurrentSetting(opener, "force_download", result->force_download, info);
49+
FileOpener::TryGetCurrentSetting(opener, "http_retries", result->retries, info);
50+
FileOpener::TryGetCurrentSetting(opener, "http_retry_wait_ms", result->retry_wait_ms, info);
51+
FileOpener::TryGetCurrentSetting(opener, "http_retry_backoff", result->retry_backoff, info);
52+
FileOpener::TryGetCurrentSetting(opener, "http_keep_alive", result->keep_alive, info);
53+
FileOpener::TryGetCurrentSetting(opener, "enable_server_cert_verification", result->enable_server_cert_verification,
5454
info);
55-
FileOpener::TryGetCurrentSetting(opener, "ca_cert_file", result.ca_cert_file, info);
56-
FileOpener::TryGetCurrentSetting(opener, "hf_max_per_page", result.hf_max_per_page, info);
55+
FileOpener::TryGetCurrentSetting(opener, "ca_cert_file", result->ca_cert_file, info);
56+
FileOpener::TryGetCurrentSetting(opener, "hf_max_per_page", result->hf_max_per_page, info);
5757

5858
// HTTP Secret lookups
5959
KeyValueSecretReader settings_reader(*opener, info, "http");
6060

61-
auto client_context = FileOpener::TryGetClientContext(opener);
62-
if (client_context) {
63-
result.Initialize(*client_context);
64-
} else {
65-
auto db = FileOpener::TryGetDatabase(opener);
66-
if (db) {
67-
result.Initialize(*db);
68-
}
69-
}
70-
7161
string proxy_setting;
7262
if (settings_reader.TryGetSecretKey<string>("http_proxy", proxy_setting) && !proxy_setting.empty()) {
7363
idx_t port;
7464
string host;
7565
HTTPUtil::ParseHTTPProxyHost(proxy_setting, host, port);
76-
result.http_proxy = host;
77-
result.http_proxy_port = port;
66+
result->http_proxy = host;
67+
result->http_proxy_port = port;
7868
}
79-
settings_reader.TryGetSecretKey<string>("http_proxy_username", result.http_proxy_username);
80-
settings_reader.TryGetSecretKey<string>("http_proxy_password", result.http_proxy_password);
81-
settings_reader.TryGetSecretKey<string>("bearer_token", result.bearer_token);
69+
settings_reader.TryGetSecretKey<string>("http_proxy_username", result->http_proxy_username);
70+
settings_reader.TryGetSecretKey<string>("http_proxy_password", result->http_proxy_password);
71+
settings_reader.TryGetSecretKey<string>("bearer_token", result->bearer_token);
8272

8373
Value extra_headers;
8474
if (settings_reader.TryGetSecretKey("extra_http_headers", extra_headers)) {
8575
auto children = MapValue::GetChildren(extra_headers);
8676
for (const auto &child : children) {
8777
auto kv = StructValue::GetChildren(child);
8878
D_ASSERT(kv.size() == 2);
89-
result.extra_headers[kv[0].GetValue<string>()] = kv[1].GetValue<string>();
79+
result->extra_headers[kv[0].GetValue<string>()] = kv[1].GetValue<string>();
9080
}
9181
}
9282

93-
return result;
83+
return std::move(result);
9484
}
9585

9686
unique_ptr<HTTPClient> HTTPClientCache::GetClient() {
@@ -113,7 +103,7 @@ unique_ptr<HTTPResponse> HTTPFileSystem::PostRequest(FileHandle &handle, string
113103
string &buffer_out, char *buffer_in, idx_t buffer_in_len,
114104
string params) {
115105
auto &hfh = handle.Cast<HTTPFileHandle>();
116-
auto &http_util = *hfh.http_params.http_util;
106+
auto &http_util = hfh.http_params.http_util;
117107
PostRequestInfo post_request(url, header_map, hfh.http_params, const_data_ptr_cast(buffer_in), buffer_in_len);
118108
auto result = http_util.Request(post_request);
119109
buffer_out = std::move(post_request.buffer_out);
@@ -123,7 +113,7 @@ unique_ptr<HTTPResponse> HTTPFileSystem::PostRequest(FileHandle &handle, string
123113
unique_ptr<HTTPResponse> HTTPFileSystem::PutRequest(FileHandle &handle, string url, HTTPHeaders header_map,
124114
char *buffer_in, idx_t buffer_in_len, string params) {
125115
auto &hfh = handle.Cast<HTTPFileHandle>();
126-
auto &http_util = *hfh.http_params.http_util;
116+
auto &http_util = hfh.http_params.http_util;
127117
string content_type = "application/octet-stream";
128118
PutRequestInfo put_request(url, header_map, hfh.http_params, (const_data_ptr_t)buffer_in, buffer_in_len,
129119
content_type);
@@ -132,7 +122,7 @@ unique_ptr<HTTPResponse> HTTPFileSystem::PutRequest(FileHandle &handle, string u
132122

133123
unique_ptr<HTTPResponse> HTTPFileSystem::HeadRequest(FileHandle &handle, string url, HTTPHeaders header_map) {
134124
auto &hfh = handle.Cast<HTTPFileHandle>();
135-
auto &http_util = *hfh.http_params.http_util;
125+
auto &http_util = hfh.http_params.http_util;
136126
auto http_client = hfh.GetClient();
137127

138128
HeadRequestInfo head_request(url, header_map, hfh.http_params);
@@ -144,7 +134,7 @@ unique_ptr<HTTPResponse> HTTPFileSystem::HeadRequest(FileHandle &handle, string
144134

145135
unique_ptr<HTTPResponse> HTTPFileSystem::DeleteRequest(FileHandle &handle, string url, HTTPHeaders header_map) {
146136
auto &hfh = handle.Cast<HTTPFileHandle>();
147-
auto &http_util = *hfh.http_params.http_util;
137+
auto &http_util = hfh.http_params.http_util;
148138
auto http_client = hfh.GetClient();
149139
DeleteRequestInfo delete_request(url, header_map, hfh.http_params);
150140
auto response = http_util.Request(delete_request, http_client);
@@ -166,7 +156,7 @@ HTTPException HTTPFileSystem::GetHTTPError(FileHandle &, const HTTPResponse &res
166156

167157
unique_ptr<HTTPResponse> HTTPFileSystem::GetRequest(FileHandle &handle, string url, HTTPHeaders header_map) {
168158
auto &hfh = handle.Cast<HTTPFileHandle>();
169-
auto &http_util = *hfh.http_params.http_util;
159+
auto &http_util = hfh.http_params.http_util;
170160

171161
D_ASSERT(hfh.cached_file_handle);
172162

@@ -215,7 +205,7 @@ unique_ptr<HTTPResponse> HTTPFileSystem::GetRequest(FileHandle &handle, string u
215205
unique_ptr<HTTPResponse> HTTPFileSystem::GetRangeRequest(FileHandle &handle, string url, HTTPHeaders header_map,
216206
idx_t file_offset, char *buffer_out, idx_t buffer_out_len) {
217207
auto &hfh = handle.Cast<HTTPFileHandle>();
218-
auto &http_util = *hfh.http_params.http_util;
208+
auto &http_util = hfh.http_params.http_util;
219209

220210
// send the Range header to read only subset of file
221211
string range_expr = "bytes=" + to_string(file_offset) + "-" + to_string(file_offset + buffer_out_len - 1);
@@ -286,8 +276,8 @@ void TimestampToTimeT(timestamp_t timestamp, time_t &result) {
286276
}
287277

288278
HTTPFileHandle::HTTPFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags,
289-
HTTPFSParams http_params_p)
290-
: FileHandle(fs, file.path, flags), http_params(std::move(http_params_p)), flags(flags), length(0),
279+
unique_ptr<HTTPParams> params_p)
280+
: FileHandle(fs, file.path, flags), params(std::move(params_p)), http_params(params->Cast<HTTPFSParams>()), flags(flags), length(0),
291281
buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) {
292282
// check if the handle has extended properties that can be set directly in the handle
293283
// if we have these properties we don't need to do a head request to obtain them later
@@ -318,7 +308,9 @@ unique_ptr<HTTPFileHandle> HTTPFileSystem::CreateHandle(const OpenFileInfo &file
318308

319309
FileOpenerInfo info;
320310
info.file_path = file.path;
321-
auto params = HTTPFSParams::ReadFrom(opener, info);
311+
312+
auto http_util = HTTPFSUtil::GetHTTPUtil(opener);
313+
auto params = http_util->InitializeParameters(opener, info);
322314

323315
auto secret_manager = FileOpener::TryGetSecretManager(opener);
324316
auto transaction = FileOpener::TryGetCatalogTransaction(opener);
@@ -327,10 +319,11 @@ unique_ptr<HTTPFileHandle> HTTPFileSystem::CreateHandle(const OpenFileInfo &file
327319

328320
if (secret_match.HasMatch()) {
329321
const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_match.secret_entry->secret);
330-
params.bearer_token = kv_secret.TryGetValue("token", true).ToString();
322+
auto &httpfs_params = params->Cast<HTTPFSParams>();
323+
httpfs_params.bearer_token = kv_secret.TryGetValue("token", true).ToString();
331324
}
332325
}
333-
return duckdb::make_uniq<HTTPFileHandle>(*this, file, flags, params);
326+
return duckdb::make_uniq<HTTPFileHandle>(*this, file, flags, std::move(params));
334327
}
335328

336329
unique_ptr<FileHandle> HTTPFileSystem::OpenFileExtended(const OpenFileInfo &file, FileOpenFlags flags,
@@ -711,7 +704,7 @@ unique_ptr<HTTPClient> HTTPFileHandle::CreateClient() {
711704
// Create a new client
712705
string path_out, proto_host_port;
713706
HTTPUtil::DecomposeURL(path, path_out, proto_host_port);
714-
return http_params.http_util->InitializeClient(http_params, proto_host_port);
707+
return http_params.http_util.InitializeClient(http_params, proto_host_port);
715708
}
716709

717710
void HTTPFileHandle::StoreClient(unique_ptr<HTTPClient> client) {

extension/httpfs/httpfs_client.cpp

Lines changed: 2 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -167,137 +167,8 @@ unordered_map<string, string> HTTPFSUtil::ParseGetParameters(const string &text)
167167
return result;
168168
}
169169

170-
string HTTPFSUtil::GetStatusMessage(HTTPStatusCode status) {
171-
switch (status) {
172-
case HTTPStatusCode::Continue_100:
173-
return "Continue";
174-
case HTTPStatusCode::SwitchingProtocol_101:
175-
return "Switching Protocol";
176-
case HTTPStatusCode::Processing_102:
177-
return "Processing";
178-
case HTTPStatusCode::EarlyHints_103:
179-
return "Early Hints";
180-
case HTTPStatusCode::OK_200:
181-
return "OK";
182-
case HTTPStatusCode::Created_201:
183-
return "Created";
184-
case HTTPStatusCode::Accepted_202:
185-
return "Accepted";
186-
case HTTPStatusCode::NonAuthoritativeInformation_203:
187-
return "Non-Authoritative Information";
188-
case HTTPStatusCode::NoContent_204:
189-
return "No Content";
190-
case HTTPStatusCode::ResetContent_205:
191-
return "Reset Content";
192-
case HTTPStatusCode::PartialContent_206:
193-
return "Partial Content";
194-
case HTTPStatusCode::MultiStatus_207:
195-
return "Multi-Status";
196-
case HTTPStatusCode::AlreadyReported_208:
197-
return "Already Reported";
198-
case HTTPStatusCode::IMUsed_226:
199-
return "IM Used";
200-
case HTTPStatusCode::MultipleChoices_300:
201-
return "Multiple Choices";
202-
case HTTPStatusCode::MovedPermanently_301:
203-
return "Moved Permanently";
204-
case HTTPStatusCode::Found_302:
205-
return "Found";
206-
case HTTPStatusCode::SeeOther_303:
207-
return "See Other";
208-
case HTTPStatusCode::NotModified_304:
209-
return "Not Modified";
210-
case HTTPStatusCode::UseProxy_305:
211-
return "Use Proxy";
212-
case HTTPStatusCode::unused_306:
213-
return "unused";
214-
case HTTPStatusCode::TemporaryRedirect_307:
215-
return "Temporary Redirect";
216-
case HTTPStatusCode::PermanentRedirect_308:
217-
return "Permanent Redirect";
218-
case HTTPStatusCode::BadRequest_400:
219-
return "Bad Request";
220-
case HTTPStatusCode::Unauthorized_401:
221-
return "Unauthorized";
222-
case HTTPStatusCode::PaymentRequired_402:
223-
return "Payment Required";
224-
case HTTPStatusCode::Forbidden_403:
225-
return "Forbidden";
226-
case HTTPStatusCode::NotFound_404:
227-
return "Not Found";
228-
case HTTPStatusCode::MethodNotAllowed_405:
229-
return "Method Not Allowed";
230-
case HTTPStatusCode::NotAcceptable_406:
231-
return "Not Acceptable";
232-
case HTTPStatusCode::ProxyAuthenticationRequired_407:
233-
return "Proxy Authentication Required";
234-
case HTTPStatusCode::RequestTimeout_408:
235-
return "Request Timeout";
236-
case HTTPStatusCode::Conflict_409:
237-
return "Conflict";
238-
case HTTPStatusCode::Gone_410:
239-
return "Gone";
240-
case HTTPStatusCode::LengthRequired_411:
241-
return "Length Required";
242-
case HTTPStatusCode::PreconditionFailed_412:
243-
return "Precondition Failed";
244-
case HTTPStatusCode::PayloadTooLarge_413:
245-
return "Payload Too Large";
246-
case HTTPStatusCode::UriTooLong_414:
247-
return "URI Too Long";
248-
case HTTPStatusCode::UnsupportedMediaType_415:
249-
return "Unsupported Media Type";
250-
case HTTPStatusCode::RangeNotSatisfiable_416:
251-
return "Range Not Satisfiable";
252-
case HTTPStatusCode::ExpectationFailed_417:
253-
return "Expectation Failed";
254-
case HTTPStatusCode::ImATeapot_418:
255-
return "I'm a teapot";
256-
case HTTPStatusCode::MisdirectedRequest_421:
257-
return "Misdirected Request";
258-
case HTTPStatusCode::UnprocessableContent_422:
259-
return "Unprocessable Content";
260-
case HTTPStatusCode::Locked_423:
261-
return "Locked";
262-
case HTTPStatusCode::FailedDependency_424:
263-
return "Failed Dependency";
264-
case HTTPStatusCode::TooEarly_425:
265-
return "Too Early";
266-
case HTTPStatusCode::UpgradeRequired_426:
267-
return "Upgrade Required";
268-
case HTTPStatusCode::PreconditionRequired_428:
269-
return "Precondition Required";
270-
case HTTPStatusCode::TooManyRequests_429:
271-
return "Too Many Requests";
272-
case HTTPStatusCode::RequestHeaderFieldsTooLarge_431:
273-
return "Request Header Fields Too Large";
274-
case HTTPStatusCode::UnavailableForLegalReasons_451:
275-
return "Unavailable For Legal Reasons";
276-
case HTTPStatusCode::NotImplemented_501:
277-
return "Not Implemented";
278-
case HTTPStatusCode::BadGateway_502:
279-
return "Bad Gateway";
280-
case HTTPStatusCode::ServiceUnavailable_503:
281-
return "Service Unavailable";
282-
case HTTPStatusCode::GatewayTimeout_504:
283-
return "Gateway Timeout";
284-
case HTTPStatusCode::HttpVersionNotSupported_505:
285-
return "HTTP Version Not Supported";
286-
case HTTPStatusCode::VariantAlsoNegotiates_506:
287-
return "Variant Also Negotiates";
288-
case HTTPStatusCode::InsufficientStorage_507:
289-
return "Insufficient Storage";
290-
case HTTPStatusCode::LoopDetected_508:
291-
return "Loop Detected";
292-
case HTTPStatusCode::NotExtended_510:
293-
return "Not Extended";
294-
case HTTPStatusCode::NetworkAuthenticationRequired_511:
295-
return "Network Authentication Required";
296-
297-
default:
298-
case HTTPStatusCode::InternalServerError_500:
299-
return "Internal Server Error";
300-
}
170+
string HTTPFSUtil::GetName() const {
171+
return "HTTPFS";
301172
}
302173

303174
} // namespace duckdb

extension/httpfs/include/hffs.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ class HFFileHandle : public HTTPFileHandle {
5757

5858
public:
5959
HFFileHandle(FileSystem &fs, ParsedHFUrl hf_url, const OpenFileInfo &file, FileOpenFlags flags,
60-
const HTTPFSParams &http_params)
61-
: HTTPFileHandle(fs, file, flags, http_params), parsed_url(std::move(hf_url)) {
60+
unique_ptr<HTTPParams> http_params)
61+
: HTTPFileHandle(fs, file, flags, std::move(http_params)), parsed_url(std::move(hf_url)) {
6262
}
6363
~HFFileHandle() override;
6464

0 commit comments

Comments
 (0)