Skip to content

Commit 07bc396

Browse files
committed
Whenever uploading a single buffer: single PUT instead of multipart upload
1 parent 81bb127 commit 07bc396

File tree

4 files changed

+38
-8
lines changed

4 files changed

+38
-8
lines changed

extension/httpfs/include/s3fs.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ class S3FileSystem : public HTTPFileSystem {
215215
// Uploads the contents of write_buffer to S3.
216216
// Note: caller is responsible to not call this method twice on the same buffer
217217
static void UploadBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer);
218+
static void UploadSingleBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer);
219+
static void UploadBufferImplementation(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer, string query_param, bool direct_throw);
218220

219221
vector<OpenFileInfo> Glob(const string &glob_pattern, FileOpener *opener = nullptr) override;
220222
bool ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback,

extension/httpfs/s3fs.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,21 @@ void S3FileSystem::NotifyUploadsInProgress(S3FileHandle &file_handle) {
355355
}
356356

357357
void S3FileSystem::UploadBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer) {
358-
auto &s3fs = (S3FileSystem &)file_handle.file_system;
359-
360358
string query_param = "partNumber=" + to_string(write_buffer->part_no + 1) + "&" +
361359
"uploadId=" + S3FileSystem::UrlEncode(file_handle.multipart_upload_id, true);
360+
361+
UploadBufferImplementation(file_handle, write_buffer, query_param, false);
362+
363+
NotifyUploadsInProgress(file_handle);
364+
}
365+
366+
void S3FileSystem::UploadSingleBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer) {
367+
UploadBufferImplementation(file_handle, write_buffer, "", true);
368+
}
369+
370+
void S3FileSystem::UploadBufferImplementation(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer, string query_param, bool single_upload) {
371+
auto &s3fs = (S3FileSystem &)file_handle.file_system;
372+
362373
unique_ptr<HTTPResponse> res;
363374
string etag;
364375

@@ -376,6 +387,9 @@ void S3FileSystem::UploadBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuf
376387
}
377388
etag = res->headers.GetHeaderValue("ETag");
378389
} catch (std::exception &ex) {
390+
if (single_upload) {
391+
throw;
392+
}
379393
ErrorData error(ex);
380394
if (error.Type() != ExceptionType::IO && error.Type() != ExceptionType::HTTP) {
381395
throw;
@@ -387,6 +401,7 @@ void S3FileSystem::UploadBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuf
387401
file_handle.upload_exception = std::current_exception();
388402
}
389403

404+
D_ASSERT(!single_upload); // If we are here we are in the multi-buffer situation
390405
NotifyUploadsInProgress(file_handle);
391406
return;
392407
}
@@ -401,8 +416,6 @@ void S3FileSystem::UploadBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuf
401416

402417
// Free up space for another thread to acquire an S3WriteBuffer
403418
write_buffer.reset();
404-
405-
NotifyUploadsInProgress(file_handle);
406419
}
407420

408421
void S3FileSystem::FlushBuffer(S3FileHandle &file_handle, shared_ptr<S3WriteBuffer> write_buffer) {
@@ -465,7 +478,13 @@ void S3FileSystem::FlushAllBuffers(S3FileHandle &file_handle) {
465478
file_handle.write_buffers_lock.unlock();
466479

467480
if (file_handle.initialized_multipart_upload == false) {
468-
file_handle.multipart_upload_id = InitializeMultipartUpload(file_handle);
481+
if (to_flush.size() == 1) {
482+
UploadSingleBuffer(file_handle, to_flush[0]);
483+
file_handle.upload_finalized= true;
484+
return;
485+
} else {
486+
file_handle.multipart_upload_id = InitializeMultipartUpload(file_handle);
487+
}
469488
}
470489
// Flush all buffers that aren't already uploading
471490
for (auto &write_buffer : to_flush) {
@@ -483,6 +502,10 @@ void S3FileSystem::FlushAllBuffers(S3FileHandle &file_handle) {
483502

484503
void S3FileSystem::FinalizeMultipartUpload(S3FileHandle &file_handle) {
485504
auto &s3fs = (S3FileSystem &)file_handle.file_system;
505+
if (file_handle.upload_finalized) {
506+
return;
507+
}
508+
486509
file_handle.upload_finalized = true;
487510

488511
std::stringstream ss;

test/configs/duckdb-tests.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
"test/sql/secrets/secret_autoloading_errors.test",
1212
"test/sql/copy/csv/zstd_crash.test"
1313
]
14+
},
15+
{
16+
"reason": "Improved from 1 PUT + 2 POST to 1 PUT",
17+
"paths": [
18+
"test/sql/copy/s3/metadata_cache.test"
19+
]
1420
}
1521
]
1622
}

test/sql/copy/no_head_on_write.test

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,18 @@ copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
4848
query I
4949
select request.type FROM duckdb_logs_parsed('HTTP')
5050
----
51-
POST
5251
PUT
53-
POST
5452

5553
statement ok
5654
CALL truncate_duckdb_logs();
5755

5856
statement ok
59-
copy (select 1 as a) to 's3://test-bucket/test-file.csv'
57+
copy (select random() as a FROM range(8000000)) to 's3://test-bucket/test-file2.csv'
6058

6159
query I
6260
select request.type FROM duckdb_logs_parsed('HTTP')
6361
----
6462
POST
6563
PUT
64+
PUT
6665
POST

0 commit comments

Comments
 (0)