Skip to content

Commit c00e1f2

Browse files
authored
Merge pull request #113 from samansmink/remove-unnecessary-head-request
Avoid sending HEAD request on writes
2 parents 0065c42 + 9d24bd1 commit c00e1f2

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

extension/httpfs/httpfs.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,11 @@ unique_ptr<FileHandle> HTTPFileSystem::OpenFileExtended(const OpenFileInfo &file
333333
}
334334

335335
auto handle = CreateHandle(file, flags, opener);
336+
337+
if (flags.OpenForWriting() && !flags.OpenForAppending() && !flags.OpenForReading()) {
338+
handle->write_overwrite_mode = true;
339+
}
340+
336341
handle->Initialize(opener);
337342

338343
DUCKDB_LOG_FILE_SYSTEM_OPEN((*handle));
@@ -584,6 +589,14 @@ void HTTPFileHandle::LoadFileInfo() {
584589
// already initialized or we specifically do not want to perform a head request and just run a direct download
585590
return;
586591
}
592+
593+
// In write_overwrite_mode we dgaf about the size, so no head request is needed
594+
if (write_overwrite_mode) {
595+
length = 0;
596+
initialized = true;
597+
return;
598+
}
599+
587600
auto &hfs = file_system.Cast<HTTPFileSystem>();
588601
auto res = hfs.HeadRequest(*this, path, {});
589602
if (res->status != HTTPStatusCode::OK_200) {

extension/httpfs/include/httpfs.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ class HTTPFileHandle : public FileHandle {
5151
bool force_full_download;
5252
bool initialized = false;
5353

54+
// In write overwrite mode, we are not interested in the current state of the file: we're overwriting it.
55+
bool write_overwrite_mode = false;
56+
5457
// When using full file download, the full file will be written to a cached file handle
5558
unique_ptr<CachedFileHandle> cached_file_handle;
5659

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# name: test/sql/copy/no_head_on_write.test
2+
# description: Confirm that we don't send head requests for writes
3+
# group: [secret]
4+
5+
require-env S3_TEST_SERVER_AVAILABLE 1
6+
7+
require-env AWS_DEFAULT_REGION
8+
9+
require-env AWS_ACCESS_KEY_ID
10+
11+
require-env AWS_SECRET_ACCESS_KEY
12+
13+
require-env DUCKDB_S3_ENDPOINT
14+
15+
require-env DUCKDB_S3_USE_SSL
16+
17+
require httpfs
18+
19+
require parquet
20+
21+
statement ok
22+
SET enable_logging=true
23+
24+
statement ok
25+
set s3_use_ssl='${DUCKDB_S3_USE_SSL}'
26+
27+
statement ok
28+
set s3_endpoint='${DUCKDB_S3_ENDPOINT}'
29+
30+
statement ok
31+
set s3_region='${AWS_DEFAULT_REGION}'
32+
33+
# Create some test data
34+
statement ok
35+
CREATE SECRET s1 (
36+
TYPE S3,
37+
KEY_ID '${AWS_ACCESS_KEY_ID}',
38+
SECRET '${AWS_SECRET_ACCESS_KEY}',
39+
REQUESTER_PAYS true
40+
)
41+
42+
statement ok
43+
CALL enable_logging('HTTP');
44+
45+
statement ok
46+
copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
47+
48+
query I
49+
select request.type FROM duckdb_logs_parsed('HTTP')
50+
----
51+
POST
52+
PUT
53+
POST
54+
55+
statement ok
56+
CALL truncate_duckdb_logs();
57+
58+
statement ok
59+
copy (select 1 as a) to 's3://test-bucket/test-file.csv'
60+
61+
query I
62+
select request.type FROM duckdb_logs_parsed('HTTP')
63+
----
64+
POST
65+
PUT
66+
POST

0 commit comments

Comments
 (0)