Skip to content

Commit 64a1095

Browse files
authored
Merge pull request #223 from DinosL/improve_curl
CURL: Refactor the client to better use the curl API for constructing URLs
2 parents 934e40e + 779f661 commit 64a1095

File tree

3 files changed

+49
-25
lines changed

3 files changed

+49
-25
lines changed

src/httpfs_curl_client.cpp

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ static idx_t httpfs_client_count = 0;
115115
class HTTPFSCurlClient : public HTTPClient {
116116
public:
117117
HTTPFSCurlClient(HTTPFSParams &http_params, const string &proto_host_port) {
118-
// FIXME: proto_host_port is not used
118+
base_url = curl_url();
119+
curl_url_set(base_url, CURLUPART_URL, proto_host_port.c_str(), 0);
119120
Initialize(http_params);
120121
}
121122
void Initialize(HTTPParams &http_p) override {
@@ -178,18 +179,10 @@ class HTTPFSCurlClient : public HTTPClient {
178179
}
179180

180181
~HTTPFSCurlClient() {
182+
curl_url_cleanup(base_url);
181183
DestroyCurlGlobal();
182184
}
183185

184-
static string EncodeSpaces(const string &url) {
185-
string out;
186-
out.reserve(url.size());
187-
for (char c : url) {
188-
out += (c == ' ') ? "%20" : string(1, c);
189-
}
190-
return out;
191-
}
192-
193186
unique_ptr<HTTPResponse> Get(GetRequestInfo &info) override {
194187
ResetRequestInfo();
195188
if (state) {
@@ -201,12 +194,16 @@ class HTTPFSCurlClient : public HTTPClient {
201194

202195
CURLcode res;
203196
{
204-
// If the same handle served a HEAD request, we must set NOBODY back to 0L to request content again
205197
curl_easy_setopt(*curl, CURLOPT_NOBODY, 0L);
206-
auto encoded_url = EncodeSpaces(request_info->url);
207-
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
198+
CURLU *url = curl_url_dup(base_url);
199+
curl_url_set(url, CURLUPART_URL, info.path.c_str(), 0);
200+
201+
curl_easy_setopt(*curl, CURLOPT_URL, nullptr);
202+
curl_easy_setopt(*curl, CURLOPT_CURLU, url);
208203
curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr);
204+
209205
res = curl->Execute();
206+
curl_url_cleanup(url);
210207
}
211208

212209
curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code);
@@ -253,8 +250,11 @@ class HTTPFSCurlClient : public HTTPClient {
253250

254251
CURLcode res;
255252
{
256-
auto encoded_url = EncodeSpaces(request_info->url);
257-
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
253+
CURLU *url = curl_url_dup(base_url);
254+
curl_url_set(url, CURLUPART_URL, info.path.c_str(), 0);
255+
256+
curl_easy_setopt(*curl, CURLOPT_URL, nullptr);
257+
curl_easy_setopt(*curl, CURLOPT_CURLU, url);
258258
// Perform PUT
259259
curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "PUT");
260260
// Include PUT body
@@ -265,6 +265,7 @@ class HTTPFSCurlClient : public HTTPClient {
265265
curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr);
266266

267267
res = curl->Execute();
268+
curl_url_cleanup(url);
268269
}
269270

270271
curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code);
@@ -284,19 +285,22 @@ class HTTPFSCurlClient : public HTTPClient {
284285

285286
CURLcode res;
286287
{
287-
// Set URL
288-
auto encoded_url = EncodeSpaces(request_info->url);
289-
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
290-
291288
// Perform HEAD request instead of GET
292289
curl_easy_setopt(*curl, CURLOPT_NOBODY, 1L);
293290
curl_easy_setopt(*curl, CURLOPT_HTTPGET, 0L);
294291

292+
CURLU *url = curl_url_dup(base_url);
293+
curl_url_set(url, CURLUPART_URL, info.path.c_str(), 0);
294+
295+
curl_easy_setopt(*curl, CURLOPT_URL, nullptr);
296+
curl_easy_setopt(*curl, CURLOPT_CURLU, url);
297+
295298
// Add headers if any
296299
curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr);
297300

298301
// Execute HEAD request
299302
res = curl->Execute();
303+
curl_url_cleanup(url);
300304
}
301305

302306
curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code);
@@ -315,9 +319,11 @@ class HTTPFSCurlClient : public HTTPClient {
315319

316320
CURLcode res;
317321
{
318-
// Set URL
319-
auto encoded_url = EncodeSpaces(request_info->url);
320-
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
322+
CURLU *url = curl_url_dup(base_url);
323+
curl_url_set(url, CURLUPART_URL, info.path.c_str(), 0);
324+
325+
curl_easy_setopt(*curl, CURLOPT_URL, nullptr);
326+
curl_easy_setopt(*curl, CURLOPT_CURLU, url);
321327

322328
// Set DELETE request method
323329
curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "DELETE");
@@ -330,6 +336,7 @@ class HTTPFSCurlClient : public HTTPClient {
330336

331337
// Execute DELETE request
332338
res = curl->Execute();
339+
curl_url_cleanup(url);
333340
}
334341

335342
// Get HTTP response status code
@@ -352,8 +359,11 @@ class HTTPFSCurlClient : public HTTPClient {
352359

353360
CURLcode res;
354361
{
355-
auto encoded_url = EncodeSpaces(request_info->url);
356-
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
362+
CURLU *url = curl_url_dup(base_url);
363+
curl_url_set(url, CURLUPART_URL, info.path.c_str(), 0);
364+
365+
curl_easy_setopt(*curl, CURLOPT_URL, nullptr);
366+
curl_easy_setopt(*curl, CURLOPT_CURLU, url);
357367
if (info.send_post_as_get_request) {
358368
curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "GET");
359369
} else {
@@ -368,6 +378,7 @@ class HTTPFSCurlClient : public HTTPClient {
368378

369379
// Execute POST request
370380
res = curl->Execute();
381+
curl_url_cleanup(url);
371382
}
372383

373384
curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code);
@@ -440,6 +451,7 @@ class HTTPFSCurlClient : public HTTPClient {
440451
unique_ptr<CURLHandle> curl;
441452
optional_ptr<HTTPState> state;
442453
unique_ptr<RequestInfo> request_info;
454+
CURLU *base_url = nullptr;
443455

444456
static std::mutex &GetRefLock() {
445457
static std::mutex mtx;

src/s3fs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1433,7 +1433,7 @@ string AWSListObjectV2::Request(const string &path, HTTPParams &http_params, S3A
14331433

14341434
// Get requests use fresh connection
14351435
string full_host = parsed_url.http_proto + parsed_url.host;
1436-
string listobjectv2_url = full_host + req_path + "?" + req_params;
1436+
string listobjectv2_url = req_path + "?" + req_params;
14371437
std::stringstream response;
14381438
ErrorData error;
14391439
GetRequestInfo get_request(
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# name: test/sql/httpfs/check_glob_error_message.test
2+
# description: check error message consistency between curl and httplib
3+
# group: [httpfs]
4+
5+
require httpfs
6+
7+
require parquet
8+
9+
statement error
10+
FROM 's3://coiled-datasets/timeseries/20-years/parquet/1*.parquet'
11+
----
12+
<REGEX>:.*IO Error.*

0 commit comments

Comments
 (0)