-
Notifications
You must be signed in to change notification settings - Fork 202
Fix write / read metadata race on local filesystem. #5698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
29122b1
8c0f30e
18171db
50970d6
0d747d0
dca3901
965ec63
ac56230
885d481
bfe511a
d8ec16b
ce1b545
c2e1816
21aed9c
e7c356b
c8fcade
3fd0a8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -147,6 +147,7 @@ class FilesystemBase { | |
| * Retrieves all the entries contained in the parent. | ||
| * | ||
| * @param parent The target directory to list. | ||
| * @param get_sizes Flag to toggle retrieving file sizes. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leftover? |
||
| * @return All entries that are contained in the parent | ||
| */ | ||
| virtual std::vector<tiledb::common::filesystem::directory_entry> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -191,6 +191,71 @@ bool URI::is_tiledb() const { | |
| return utils::parse::starts_with(uri_, "tiledb://"); | ||
| } | ||
|
|
||
| bool URI::is_timestamped_name() const { | ||
| std::string part = last_path_part(); | ||
| // __1_2_<32-digit-UUID> must be at minimum 38 characters long. | ||
| if (!part.starts_with("__") || part.size() < 38) { | ||
| return false; | ||
| } | ||
|
|
||
| // Separator between t1_t2. | ||
| size_t t1_separator = part.find('_', 2); | ||
| // Separator between t2_uuid. | ||
| size_t t2_separator = part.find('_', t1_separator + 1); | ||
| if (t1_separator == std::string::npos || t2_separator == std::string::npos) { | ||
| return false; | ||
| } | ||
|
|
||
| // Validate the timestamps are formatted correctly. | ||
| std::string t1 = part.substr(2, t1_separator - 2); | ||
| std::string t2 = | ||
| part.substr(t1_separator + 1, t2_separator - t1_separator - 1); | ||
| for (const auto& t : {t1, t2}) { | ||
| if (!std::all_of( | ||
| t.begin(), t.end(), [](const char c) { return std::isdigit(c); })) { | ||
| return false; | ||
| } | ||
| } | ||
| auto get_suffix = [](const std::string& p) -> std::string { | ||
| size_t suffix_separator = p.find('.'); | ||
| if (suffix_separator == std::string::npos) { | ||
| return ""; | ||
| } | ||
| return p.substr(suffix_separator); | ||
| }; | ||
|
|
||
| // Separator between uuid[_v]. | ||
| size_t uuid_separator = part.find('_', t2_separator + 1); | ||
| std::string uuid; | ||
| std::string suffix; | ||
| if (uuid_separator == std::string::npos) { | ||
| // There is no version; Check the UUID for the final suffix. | ||
| uuid = part.substr(t2_separator + 1); | ||
| suffix = get_suffix(uuid); | ||
| uuid.resize(uuid.size() - suffix.size()); | ||
| } else { | ||
| uuid = part.substr(t2_separator + 1, uuid_separator - t2_separator - 1); | ||
| std::string version = part.substr(uuid_separator + 1); | ||
| // Check the version for the final suffix. | ||
| suffix = get_suffix(version); | ||
| version.resize(version.size() - suffix.size()); | ||
| if (version.size() > std::to_string(constants::format_version).size()) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| // UUIDs generated for timestamped names are 32 characters long. | ||
| if (uuid.size() != 32) { | ||
| return false; | ||
| } | ||
|
|
||
| if (!suffix.empty() && suffix != constants::vacuum_file_suffix) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why special-case |
||
| return false; | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
|
Comment on lines
+194
to
+258
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Share this logic with |
||
| std::optional<size_t> URI::get_storage_component_index( | ||
| size_t start_index) const { | ||
| // Find '://' between path and array name. iff it exists | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -364,10 +364,21 @@ bool VFS::is_bucket(const URI& uri) const { | |
| Status VFS::ls(const URI& parent, std::vector<URI>* uris) const { | ||
| stats_->add_counter("ls_num", 1); | ||
|
|
||
| for (auto& fs : ls_with_sizes(parent)) { | ||
| auto entries = parent.is_file() ? local_.ls_with_sizes(parent, false) : | ||
| ls_with_sizes(parent); | ||
| if (parent.is_file()) { | ||
| parallel_sort( | ||
| compute_tp_, | ||
| entries.begin(), | ||
| entries.end(), | ||
| [](const directory_entry& l, const directory_entry& r) { | ||
| return l.path().native() < r.path().native(); | ||
| }); | ||
| } | ||
|
Comment on lines
+369
to
+377
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is pre-existing, but curious; is there code in this repo that assumes that paths are sorted? Sorting paths when listing is not a good idea in general, and precludes incrementally reading them.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only code expecting sorted results that I'm aware of is test code. For example here's the CI run with the failures that this produced when I accidentally bypassed sorting in c2e1816 https://github.com/TileDB-Inc/TileDB/actions/runs/19717674222/job/56493579308#step:14:32027 |
||
|
|
||
| for (const auto& fs : entries) { | ||
| uris->emplace_back(fs.path().native()); | ||
| } | ||
|
|
||
| return Status::Ok(); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A better phrasing would be: