Skip to content

Commit 8174190

Browse files
Optimize VFS::touch and fix defects and race conditions. (#5285)
[SC-53188](https://app.shortcut.com/tiledb-inc/story/53188/azure-update-touch-to-use-conditional-requests) [SC-53189](https://app.shortcut.com/tiledb-inc/story/53189/gcs-update-touch-to-use-conditional-requests) [SC-54494](https://app.shortcut.com/tiledb-inc/story/54494/win-touch-is-prone-to-race-conditions) This PR updates the implementations of `VFS::touch` for Azure, GCS and Windows to improve performance and fix race conditions: * On Azure we remove checking if the blob exists, and instead use a single [conditional request](https://learn.microsoft.com/en-us/rest/api/storageservices/specifying-conditional-headers-for-blob-service-operations) to upload an empty blob if it does not already exist. This reduces round-trips and fixes race conditions if the blob was created between the existence check and the upload. * GCS did not even have an existence check, which means that the behavior of `touch` was incorrect and always overwrote the file. This was fixed by adding a [request precondition](https://cloud.google.com/storage/docs/request-preconditions). * On Windows the existence check was removed. We were already doing the right thing and opened the file with `CREATE_NEW`, but did not mask failures with `ERROR_FILE_EXISTS` status code. We fix this. Additionally, the documentation of `tiledb_vfs_touch` was updated to mention that the file is created only if it does not exist, matching existing behavior in most VFS implmentations, and the expectations from the `touch` command. Testing coverage was added. --- TYPE: BUG DESC: Fixed `tiledb_vfs_touch` to not overwrite existing files on GCS. --- TYPE: BUG DESC: Fixed `tiledb_vfs_touch` to not overwrite existing files or fail on Azure and Windows respectively, under race conditions. --- TYPE: C_API DESC: Update documentation of `tiledb_vfs_touch` to specify that existing files are not overwritten, matching the current behavior.
1 parent 6c2363b commit 8174190

File tree

6 files changed

+37
-12
lines changed

6 files changed

+37
-12
lines changed

test/src/unit-vfs.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,12 @@ TEST_CASE("VFS: test ls_with_sizes", "[vfs][ls-with-sizes]") {
619619
// Directories don't get a size
620620
REQUIRE(children[1].file_size() == 0);
621621

622+
// Touch does not overwrite an existing file.
623+
require_tiledb_ok(vfs_ls.touch(URI(subdir_file)));
624+
uint64_t size;
625+
require_tiledb_ok(vfs_ls.file_size(URI(subdir_file), &size));
626+
REQUIRE(size == 6);
627+
622628
// Clean up
623629
require_tiledb_ok(vfs_ls.remove_dir(URI(path)));
624630
}

tiledb/api/c_api/vfs/vfs_api_external.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,9 @@ TILEDB_EXPORT capi_return_t tiledb_vfs_fh_is_closed(
640640
tiledb_ctx_t* ctx, tiledb_vfs_fh_t* fh, int32_t* is_closed) TILEDB_NOEXCEPT;
641641

642642
/**
643-
* Touches a file, i.e., creates a new empty file.
643+
* Touches a file, i.e., creates a new empty file if it does not already exist.
644+
*
645+
* The access timestamps of the file are not guaranteed to change.
644646
*
645647
* **Example:**
646648
*

tiledb/sm/cpp_api/vfs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,8 @@ class VFS {
519519
ctx.ptr().get(), vfs_.get(), old_uri.c_str(), new_uri.c_str()));
520520
}
521521

522-
/** Touches a file with the input URI, i.e., creates a new empty file. */
522+
/** Touches a file with the input URI, i.e., creates a new empty file if it
523+
* does not already exist. */
523524
void touch(const std::string& uri) const {
524525
auto& ctx = ctx_.get();
525526
ctx.handle_error(

tiledb/sm/filesystem/azure.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -730,16 +730,22 @@ void Azure::touch(const URI& uri) const {
730730
throw AzureException(
731731
"Cannot create file; URI is a directory: " + uri.to_string());
732732
}
733-
if (is_blob(uri)) {
734-
return;
735-
}
736733

737734
auto [container_name, blob_path] = parse_azure_uri(uri);
738735
try {
736+
::Azure::Core::IO::MemoryBodyStream stream(nullptr, 0);
737+
::Azure::Storage::Blobs::UploadBlockBlobOptions options;
738+
// Set condition that the object does not exist.
739+
options.AccessConditions.IfNoneMatch = ::Azure::ETag::Any();
740+
739741
c.GetBlobContainerClient(container_name)
740742
.GetBlockBlobClient(blob_path)
741-
.UploadFrom(nullptr, 0);
743+
.Upload(stream, options);
742744
} catch (const ::Azure::Storage::StorageException& e) {
745+
if (e.StatusCode == ::Azure::Core::Http::HttpStatusCode::Conflict) {
746+
// Blob already exists.
747+
return;
748+
}
743749
throw AzureException(
744750
"Touch blob failed on: " + uri.to_string() + "; " + e.Message);
745751
}

tiledb/sm/filesystem/gcs.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,10 +741,20 @@ Status GCS::touch(const URI& uri) const {
741741
RETURN_NOT_OK(parse_gcs_uri(uri, &bucket_name, &object_path));
742742

743743
google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>
744-
object_metadata = client_->InsertObject(bucket_name, object_path, "");
744+
object_metadata = client_->InsertObject(
745+
bucket_name,
746+
object_path,
747+
"",
748+
// An if-generation-match value of zero makes the request succeed only
749+
// if the object does not exist.
750+
// https://cloud.google.com/storage/docs/request-preconditions#special-case
751+
google::cloud::storage::IfGenerationMatch(0));
745752

746753
if (!object_metadata.ok()) {
747-
const google::cloud::Status status = object_metadata.status();
754+
const google::cloud::Status& status = object_metadata.status();
755+
if (status.code() == google::cloud::StatusCode::kFailedPrecondition) {
756+
return Status::Ok();
757+
}
748758

749759
throw GCSException(
750760
"Touch object failed on: " + uri.to_string() + " (" + status.message() +

tiledb/sm/filesystem/win.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,6 @@ Status Win::create_dir(const std::string& path) const {
161161
}
162162

163163
Status Win::touch(const std::string& filename) const {
164-
if (is_file(filename)) {
165-
return Status::Ok();
166-
}
167-
168164
HANDLE file_h = CreateFile(
169165
filename.c_str(),
170166
GENERIC_WRITE,
@@ -180,6 +176,10 @@ Status Win::touch(const std::string& filename) const {
180176
tiledb::common::ScopedExecutor onexit1(closefileonexit);
181177
if (file_h == INVALID_HANDLE_VALUE) {
182178
auto gle = GetLastError();
179+
if (gle == ERROR_FILE_EXISTS) {
180+
// Do not fail if the file already exists.
181+
return Status::Ok();
182+
}
183183
return LOG_STATUS(Status_IOError(
184184
std::string("Failed to create file '") + filename + " (" +
185185
get_last_error_msg(gle, "CreateFile") + ")'"));

0 commit comments

Comments
 (0)