Skip to content

Commit 523230a

Browse files
committed
Changes from review.
1 parent 2b03211 commit 523230a

File tree

9 files changed

+73
-39
lines changed

9 files changed

+73
-39
lines changed

format_spec/FORMAT_SPEC.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ title: Format Specification
66

77
* The current TileDB array format version number is **22** (`uint32_t`).
88
* Other structures might be versioned separately.
9+
* Readers should ignore files that do not follow the respective
10+
format specification.
911
* Data written by TileDB and referenced in this document is **little-endian**
1012
with the following exceptions:
1113

format_spec/metadata.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ The metadata folder can contain any number of [timestamped](./timestamped_name.m
2323

2424
## Metadata File
2525

26-
Metadata files with the `.tmp` extension are used for local filesystems only, indicating the metadata is still being flushed to disk.
27-
2826
The metadata file consists of a single [generic tile](./generic_tile.md), containing multiple entries with the following data:
2927

3028
| **Field** | **Type** | **Description** |

tiledb/sm/array/array_directory.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ void ArrayDirectory::load_array_meta_uris() {
842842
uri_.join_path(constants::array_metadata_dir_name), &unfiltered_uris));
843843
for (const auto& unfiltered_uri : unfiltered_uris) {
844844
const auto& uri = unfiltered_uri.to_string();
845-
if (uri.ends_with(constants::temp_file_suffix)) {
845+
if (!unfiltered_uri.is_timestamped_name()) {
846846
resources_.get().logger()->debug(
847847
"Skipping partial array metadata file: '{}'", uri);
848848
continue;

tiledb/sm/filesystem/posix.cc

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -375,34 +375,35 @@ std::vector<directory_entry> Posix::ls_with_sizes(const URI& uri) const {
375375
if (!strcmp(next_path->d_name, ".") || !strcmp(next_path->d_name, ".."))
376376
continue;
377377
std::string abspath = path + "/" + next_path->d_name;
378-
// Do not attempt to retrieve file size for temporary metadata files that
379-
// are still flushing to disk.
380-
const bool temp_metadata =
381-
URI(abspath)
382-
.parent_path()
383-
.remove_trailing_slash()
384-
.to_string()
385-
.ends_with(constants::array_metadata_dir_name) &&
386-
abspath.ends_with(constants::temp_file_suffix);
387-
388-
// Getting the file size here incurs an additional system call
389-
// via file_size() and ls() calls will feel this too.
390-
// If this penalty becomes noticeable, we should just duplicate
391-
// this implementation in ls() and don't get the size
378+
392379
if (next_path->d_type == DT_DIR) {
393380
entries.emplace_back(abspath, 0, true);
394381
} else {
395-
uint64_t size = temp_metadata ? 0 : file_size(URI(abspath));
396-
entries.emplace_back(abspath, size, false);
382+
entries.emplace_back(abspath, file_size(URI(abspath)), false);
397383
}
398384
}
399385
return entries;
400386
}
401387

402388
Status Posix::ls(
403389
const std::string& path, std::vector<std::string>* paths) const {
404-
for (auto& fs : ls_with_sizes(URI(path))) {
405-
paths->emplace_back(fs.path().native());
390+
struct dirent* next_path = nullptr;
391+
auto dir = PosixDIR::open(path);
392+
if (dir.empty()) {
393+
return {};
394+
}
395+
396+
while ((next_path = readdir(dir.get())) != nullptr) {
397+
if (!strcmp(next_path->d_name, ".") || !strcmp(next_path->d_name, "..")) {
398+
continue;
399+
}
400+
std::string abspath = path + "/" + next_path->d_name;
401+
402+
if (next_path->d_type == DT_DIR) {
403+
paths->emplace_back(abspath);
404+
} else {
405+
paths->emplace_back(abspath);
406+
}
406407
}
407408

408409
return Status::Ok();

tiledb/sm/filesystem/uri.cc

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,49 @@ bool URI::is_tiledb() const {
191191
return utils::parse::starts_with(uri_, "tiledb://");
192192
}
193193

194+
bool URI::is_timestamped_name() const {
195+
std::string part = last_path_part();
196+
// __1_2_<32-digit-UUID> must be at minimum 38 characters long.
197+
if (!part.starts_with("__") || part.size() < 38) {
198+
return false;
199+
}
200+
201+
// Separator between t1_t2.
202+
size_t t1_separator = part.find('_', 2);
203+
// Separator between t2_uuid.
204+
size_t t2_separator = part.find('_', t1_separator + 1);
205+
if (t1_separator == std::string::npos || t2_separator == std::string::npos) {
206+
return false;
207+
}
208+
209+
// Validate the timestamps are formatted correctly.
210+
std::string t1 = part.substr(2, t1_separator - 2);
211+
std::string t2 = part.substr(t1_separator + 1, t2_separator - t1_separator - 1);
212+
for (const auto& t : {t1, t2}) {
213+
if (!std::ranges::all_of(t, [](const char c) { return std::isdigit(c); })) {
214+
return false;
215+
}
216+
}
217+
218+
// Separator between uuid[_v].
219+
size_t uuid_separator = part.find('_', t2_separator + 1);
220+
std::string uuid = part.substr(t2_separator + 1, uuid_separator);
221+
// UUIDs generated for timestamped names are 32 characters long.
222+
if (uuid.size() != 32) {
223+
return false;
224+
}
225+
226+
// Version is optional and may not appear in files using a timestamped name.
227+
if (uuid_separator != std::string::npos) {
228+
std::string version = part.substr(uuid_separator + 1);
229+
if (version.size() > std::to_string(constants::format_version).size()) {
230+
return false;
231+
}
232+
}
233+
234+
return true;
235+
}
236+
194237
std::optional<size_t> URI::get_storage_component_index(
195238
size_t start_index) const {
196239
// Find '://' between path and array name. iff it exists

tiledb/sm/filesystem/uri.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,13 @@ class URI {
226226
*/
227227
bool is_tiledb() const;
228228

229+
/**
230+
* Checks if the last part of the URI is a valid timestamped name.
231+
*
232+
* @return True if the last part if a timestamped name, else false.
233+
*/
234+
bool is_timestamped_name() const;
235+
229236
/**
230237
* Get the position in the input URI where the embedded storage URI starts, if
231238
* one can be found.

tiledb/sm/filesystem/win.cc

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -384,24 +384,13 @@ std::vector<directory_entry> Win::ls_with_sizes(const URI& uri) const {
384384
std::string file_path =
385385
path + (ends_with_slash ? "" : "\\") + find_data.cFileName;
386386

387-
// Do not attempt to retrieve file size for temporary metadata files that
388-
// are still flushing to disk.
389-
const bool temp_metadata =
390-
URI(file_path)
391-
.parent_path()
392-
.remove_trailing_slash()
393-
.to_string()
394-
.ends_with(constants::array_metadata_dir_name) &&
395-
file_path.ends_with(constants::temp_file_suffix);
396-
397387
if (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
398388
entries.emplace_back(file_path, 0, true);
399389
} else {
400390
ULARGE_INTEGER size;
401391
size.LowPart = find_data.nFileSizeLow;
402392
size.HighPart = find_data.nFileSizeHigh;
403-
entries.emplace_back(
404-
file_path, temp_metadata ? 0 : size.QuadPart, false);
393+
entries.emplace_back(file_path, size.QuadPart, false);
405394
}
406395
}
407396

tiledb/sm/misc/constants.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,6 @@ const std::string con_commits_file_suffix = ".con";
261261
/** Suffix for the special ignore files used in TileDB. */
262262
const std::string ignore_file_suffix = ".ign";
263263

264-
/** Suffix for temporary array metadata files used in TileDB for LocalFS. */
265-
const std::string temp_file_suffix = ".tmp";
266-
267264
/** Default datatype for a generic tile. */
268265
const Datatype generic_tile_datatype = Datatype::CHAR;
269266

tiledb/sm/misc/constants.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,6 @@ extern const std::string con_commits_file_suffix;
260260
/** Suffix for the special ignore files used in TileDB. */
261261
extern const std::string ignore_file_suffix;
262262

263-
/** Suffix for temporary array metadata files used in TileDB for LocalFS. */
264-
extern const std::string temp_file_suffix;
265-
266263
/** The fragment metadata file name. */
267264
extern const std::string fragment_metadata_filename;
268265

0 commit comments

Comments
 (0)