Skip to content

Commit 383faf0

Browse files
Optimize opening arrays — part 2 (#5662)
* Refactored `URI` to skip some checks when we are creating URIs from existing URIs. * Eliminated copyings of `ArrayDirectory`. Profiler results are available in CORE-383. --- TYPE: NO_HISTORY
1 parent 4797c93 commit 383faf0

File tree

10 files changed

+108
-156
lines changed

10 files changed

+108
-156
lines changed

tiledb/api/c_api/array/array_api_internal.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,6 @@ struct tiledb_array_handle_t
9898
array_->delete_array(uri);
9999
}
100100

101-
void delete_fragments(
102-
tiledb::sm::ContextResources& resources,
103-
const tiledb::sm::URI& uri,
104-
uint64_t ts_start,
105-
uint64_t ts_end,
106-
std::optional<tiledb::sm::ArrayDirectory> array_dir = std::nullopt) {
107-
array_->delete_fragments(resources, uri, ts_start, ts_end, array_dir);
108-
}
109-
110101
void delete_fragments(
111102
const tiledb::sm::URI& uri,
112103
uint64_t timestamp_start,

tiledb/sm/array/array.cc

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -658,42 +658,33 @@ Status Array::close() {
658658
}
659659

660660
void Array::delete_fragments(
661-
ContextResources& resources,
662-
const URI& uri,
663-
uint64_t timestamp_start,
664-
uint64_t timestamp_end,
665-
std::optional<ArrayDirectory> array_dir) {
666-
// Get the fragment URIs to be deleted
667-
if (array_dir == std::nullopt) {
668-
array_dir = ArrayDirectory(resources, uri, timestamp_start, timestamp_end);
669-
}
670-
auto filtered_fragment_uris = array_dir->filtered_fragment_uris(true);
661+
ContextResources& resources, ArrayDirectory& array_dir) {
662+
auto filtered_fragment_uris = array_dir.filtered_fragment_uris(true);
671663
const auto& fragment_uris = filtered_fragment_uris.fragment_uris();
672664

673665
// Retrieve commit uris to delete and ignore
674666
std::vector<URI> commit_uris_to_delete;
675667
std::vector<URI> commit_uris_to_ignore;
676668
for (auto& fragment : fragment_uris) {
677-
auto commit_uri = array_dir->get_commit_uri(fragment.uri_);
669+
auto commit_uri = array_dir.get_commit_uri(fragment.uri_);
678670
commit_uris_to_delete.emplace_back(commit_uri);
679-
if (array_dir->consolidated_commit_uris_set().count(commit_uri.c_str()) !=
680-
0) {
671+
if (array_dir.consolidated_commit_uris_set().count(commit_uri) != 0) {
681672
commit_uris_to_ignore.emplace_back(commit_uri);
682673
}
683674
}
684675

685676
// Write ignore file
686677
if (commit_uris_to_ignore.size() != 0) {
687-
array_dir->write_commit_ignore_file(commit_uris_to_ignore);
678+
array_dir.write_commit_ignore_file(commit_uris_to_ignore);
688679
}
689680

690681
// Delete fragments and commits
691-
auto vfs = &(resources.vfs());
682+
auto& vfs = resources.vfs();
692683
throw_if_not_ok(parallel_for(
693684
&resources.compute_tp(), 0, fragment_uris.size(), [&](size_t i) {
694-
vfs->remove_dir(fragment_uris[i].uri_);
695-
if (vfs->is_file(commit_uris_to_delete[i])) {
696-
vfs->remove_file(commit_uris_to_delete[i]);
685+
vfs.remove_dir(fragment_uris[i].uri_);
686+
if (vfs.is_file(commit_uris_to_delete[i])) {
687+
vfs.remove_file(commit_uris_to_delete[i]);
697688
}
698689
return Status::Ok();
699690
}));
@@ -714,7 +705,9 @@ void Array::delete_fragments(
714705
rest_client->post_delete_fragments_to_rest(
715706
uri, this, timestamp_start, timestamp_end);
716707
} else {
717-
Array::delete_fragments(resources_, uri, timestamp_start, timestamp_end);
708+
auto array_dir =
709+
ArrayDirectory(resources_, uri, timestamp_start, timestamp_end);
710+
Array::delete_fragments(resources_, array_dir);
718711
}
719712
}
720713

@@ -724,8 +717,7 @@ void Array::delete_array(ContextResources& resources, const URI& uri) {
724717
ArrayDirectory(resources, uri, 0, std::numeric_limits<uint64_t>::max());
725718

726719
// Delete fragments and commits
727-
Array::delete_fragments(
728-
resources, uri, 0, std::numeric_limits<uint64_t>::max(), array_dir);
720+
Array::delete_fragments(resources, array_dir);
729721

730722
// Delete array metadata, fragment metadata and array schema files
731723
// Note: metadata files may not be present, try to delete anyway

tiledb/sm/array/array.h

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class OpenedArray {
100100
uint64_t timestamp_end_opened_at,
101101
bool is_remote)
102102
: resources_(resources)
103-
, array_dir_(ArrayDirectory(resources, array_uri))
103+
, array_dir_(std::in_place, resources, array_uri)
104104
, array_schema_latest_(nullptr)
105105
, metadata_(memory_tracker)
106106
, metadata_loaded_(false)
@@ -124,8 +124,8 @@ class OpenedArray {
124124
}
125125

126126
/** Sets the array directory. */
127-
inline void set_array_directory(const ArrayDirectory&& dir) {
128-
array_dir_ = dir;
127+
inline void set_array_directory(ArrayDirectory&& dir) {
128+
array_dir_.emplace(std::move(dir));
129129
}
130130

131131
/** Returns the latest array schema. */
@@ -320,7 +320,7 @@ class Array {
320320
}
321321

322322
/** Set the array directory. */
323-
inline void set_array_directory(const ArrayDirectory&& dir) {
323+
inline void set_array_directory(ArrayDirectory&& dir) {
324324
opened_array_->set_array_directory(std::move(dir));
325325
}
326326

@@ -463,23 +463,10 @@ class Array {
463463
* between the provided timestamps.
464464
*
465465
* @param resources The context resources.
466-
* @param uri The uri of the Array whose fragments are to be deleted.
467-
* @param timestamp_start The start timestamp at which to delete fragments.
468-
* @param timestamp_end The end timestamp at which to delete fragments.
469466
* @param array_dir An optional ArrayDirectory from which to delete fragments.
470-
*
471-
* @section Maturity Notes
472-
* This is legacy code, ported from StorageManager during its removal process.
473-
* Its existence supports the non-static `delete_fragments` API below,
474-
* performing the actual deletion of fragments. This function is slated for
475-
* removal and should be directly integrated into the function below.
476467
*/
477468
static void delete_fragments(
478-
ContextResources& resources,
479-
const URI& uri,
480-
uint64_t timestamp_start,
481-
uint64_t timstamp_end,
482-
std::optional<ArrayDirectory> array_dir = std::nullopt);
469+
ContextResources& resources, ArrayDirectory& array_dir);
483470

484471
/**
485472
* Handles local and remote deletion of fragments between the provided

tiledb/sm/array/array_directory.cc

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,8 @@ const std::vector<URI>& ArrayDirectory::commit_uris_to_vacuum() const {
253253
return commit_uris_to_vacuum_;
254254
}
255255

256-
const std::unordered_set<std::string>&
257-
ArrayDirectory::consolidated_commit_uris_set() const {
256+
const std::unordered_set<URI>& ArrayDirectory::consolidated_commit_uris_set()
257+
const {
258258
return consolidated_commit_uris_set_;
259259
}
260260

@@ -309,7 +309,7 @@ void ArrayDirectory::delete_fragments_list(
309309
for (auto& timestamped_uri : uris) {
310310
auto commit_uri = get_commit_uri(timestamped_uri);
311311
commit_uris_to_delete.emplace_back(commit_uri);
312-
if (consolidated_commit_uris_set().count(commit_uri.c_str()) != 0) {
312+
if (consolidated_commit_uris_set().count(commit_uri) != 0) {
313313
commit_uris_to_ignore.emplace_back(commit_uri);
314314
}
315315
}
@@ -483,7 +483,7 @@ ArrayDirectory::filtered_fragment_uris(const bool full_overlap_only) const {
483483
if (mode_ == ArrayDirectoryMode::VACUUM_FRAGMENTS) {
484484
for (auto& uri : fragment_uris_to_vacuum.value()) {
485485
auto commit_uri = get_commit_uri(uri);
486-
if (consolidated_commit_uris_set_.count(commit_uri.c_str()) == 0) {
486+
if (consolidated_commit_uris_set_.count(commit_uri) == 0) {
487487
commit_uris_to_vacuum.emplace_back(commit_uri);
488488
} else {
489489
commit_uris_to_ignore.emplace_back(commit_uri);
@@ -672,8 +672,7 @@ ArrayDirectory::load_commits_dir_uris_v12_or_higher(
672672
for (size_t i = 0; i < commits_dir_uris.size(); ++i) {
673673
if (stdx::string::ends_with(
674674
commits_dir_uris[i].to_string(), constants::write_file_suffix)) {
675-
if (consolidated_commit_uris_set_.count(commits_dir_uris[i].c_str()) ==
676-
0) {
675+
if (consolidated_commit_uris_set_.count(commits_dir_uris[i]) == 0) {
677676
auto name = commits_dir_uris[i].last_path_part();
678677
name =
679678
name.substr(0, name.size() - constants::write_file_suffix.size());
@@ -694,8 +693,7 @@ ArrayDirectory::load_commits_dir_uris_v12_or_higher(
694693

695694
// Add the delete tile location if it overlaps the open start/end times
696695
if (timestamps_overlap(timestamp_range, false)) {
697-
if (consolidated_commit_uris_set_.count(commits_dir_uris[i].c_str()) ==
698-
0) {
696+
if (consolidated_commit_uris_set_.count(commits_dir_uris[i]) == 0) {
699697
const auto base_uri_size = uri_.to_string().size();
700698
delete_and_update_tiles_location_.emplace_back(
701699
commits_dir_uris[i],
@@ -716,10 +714,7 @@ ArrayDirectory::list_fragment_metadata_dir_uris_v12_or_higher() {
716714
return ls(uri_.join_path(constants::array_fragment_meta_dir_name));
717715
}
718716

719-
tuple<
720-
Status,
721-
optional<std::vector<URI>>,
722-
optional<std::unordered_set<std::string>>>
717+
tuple<Status, optional<std::vector<URI>>, optional<std::unordered_set<URI>>>
723718
ArrayDirectory::load_consolidated_commit_uris(
724719
const std::vector<URI>& commits_dir_uris) {
725720
auto timer_se = stats_->start_timer("load_consolidated_commit_uris");
@@ -745,7 +740,7 @@ ArrayDirectory::load_consolidated_commit_uris(
745740

746741
// Load all commit URIs. This is done in serial for now as it can be optimized
747742
// by vacuuming.
748-
std::unordered_set<std::string> uris_set;
743+
std::unordered_set<URI> uris_set;
749744
std::vector<std::pair<URI, std::string>> meta_files;
750745
for (uint64_t i = 0; i < commits_dir_uris.size(); i++) {
751746
auto& uri = commits_dir_uris[i];
@@ -763,7 +758,7 @@ ArrayDirectory::load_consolidated_commit_uris(
763758
std::stringstream ss(names);
764759
for (std::string condition_marker; std::getline(ss, condition_marker);) {
765760
if (ignore_set.count(condition_marker) == 0) {
766-
uris_set.emplace(uri_.to_string() + condition_marker);
761+
uris_set.emplace(uri_.append_string(condition_marker));
767762
}
768763

769764
// If we have a delete, process the condition tile
@@ -807,7 +802,7 @@ ArrayDirectory::load_consolidated_commit_uris(
807802
uint64_t count = 0;
808803
bool all_in_set = true;
809804
for (std::string uri_str; std::getline(ss, uri_str);) {
810-
if (uris_set.count(uri_.to_string() + uri_str) > 0) {
805+
if (uris_set.count(uri_.append_string(uri_str)) > 0) {
811806
count++;
812807
} else {
813808
all_in_set = false;
@@ -879,7 +874,7 @@ void ArrayDirectory::load_commits_uris_to_consolidate(
879874
const std::vector<URI>& array_dir_uris,
880875
const std::vector<URI>& commits_dir_uris,
881876
const std::vector<URI>& consolidated_uris,
882-
const std::unordered_set<std::string>& consolidated_uris_set) {
877+
const std::unordered_set<URI>& consolidated_uris_set) {
883878
// Make a set of existing commit URIs.
884879
std::unordered_set<std::string> uris_set;
885880
for (auto& uri : array_dir_uris) {
@@ -902,7 +897,7 @@ void ArrayDirectory::load_commits_uris_to_consolidate(
902897
// Add the ok file URIs not already in the list.
903898
for (auto& uri : array_dir_uris) {
904899
if (stdx::string::ends_with(uri.to_string(), constants::ok_file_suffix)) {
905-
if (consolidated_uris_set.count(uri.c_str()) == 0) {
900+
if (consolidated_uris_set.count(uri) == 0) {
906901
commit_uris_to_consolidate_.emplace_back(uri);
907902
}
908903
}
@@ -914,7 +909,7 @@ void ArrayDirectory::load_commits_uris_to_consolidate(
914909
uri.to_string(), constants::write_file_suffix) ||
915910
stdx::string::ends_with(
916911
uri.to_string(), constants::delete_file_suffix)) {
917-
if (consolidated_uris_set.count(uri.c_str()) == 0) {
912+
if (consolidated_uris_set.count(uri) == 0) {
918913
commit_uris_to_consolidate_.emplace_back(uri);
919914
}
920915
}
@@ -1258,7 +1253,7 @@ bool ArrayDirectory::is_vacuum_file(const URI& uri) const {
12581253
Status ArrayDirectory::is_fragment(
12591254
const URI& uri,
12601255
const std::unordered_set<std::string>& ok_uris_set,
1261-
const std::unordered_set<std::string>& consolidated_uris_set,
1256+
const std::unordered_set<URI>& consolidated_uris_set,
12621257
int* is_fragment) const {
12631258
// If the fragment ID does not have a name, ignore it.
12641259
if (!FragmentID::has_fragment_name(uri)) {
@@ -1292,7 +1287,7 @@ Status ArrayDirectory::is_fragment(
12921287

12931288
// Check set membership in consolidated uris
12941289
if (consolidated_uris_set.count(
1295-
uri.to_string() + constants::ok_file_suffix) != 0) {
1290+
uri.append_string(constants::ok_file_suffix)) != 0) {
12961291
*is_fragment = 1;
12971292
return Status::Ok();
12981293
}

tiledb/sm/array/array_directory.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ class ArrayDirectory {
311311
uint64_t timestamp_end,
312312
ArrayDirectoryMode mode = ArrayDirectoryMode::READ);
313313

314+
DISABLE_COPY_AND_COPY_ASSIGN(ArrayDirectory);
315+
316+
ArrayDirectory(ArrayDirectory&&) = default;
317+
314318
/** Destructor. */
315319
~ArrayDirectory() = default;
316320

@@ -436,7 +440,7 @@ class ArrayDirectory {
436440
const std::vector<URI>& commit_uris_to_vacuum() const;
437441

438442
/** Returns the consolidated commit URI set. */
439-
const std::unordered_set<std::string>& consolidated_commit_uris_set() const;
443+
const std::unordered_set<URI>& consolidated_commit_uris_set() const;
440444

441445
/** Returns the URIs of the consolidated commit files to vacuum. */
442446
const std::vector<URI>& consolidated_commits_uris_to_vacuum() const;
@@ -511,7 +515,7 @@ class ArrayDirectory {
511515
}
512516

513517
/** Accessor to consolidated_commit_uris_set_ */
514-
inline std::unordered_set<std::string>& consolidated_commit_uris_set() {
518+
inline std::unordered_set<URI>& consolidated_commit_uris_set() {
515519
return consolidated_commit_uris_set_;
516520
}
517521

@@ -589,7 +593,7 @@ class ArrayDirectory {
589593
std::vector<URI> unfiltered_fragment_uris_;
590594

591595
/** Consolidated commit URI set. */
592-
std::unordered_set<std::string> consolidated_commit_uris_set_;
596+
std::unordered_set<URI> consolidated_commit_uris_set_;
593597

594598
/** The URIs of all the array schema files. */
595599
std::vector<URI> array_schema_uris_;
@@ -710,18 +714,15 @@ class ArrayDirectory {
710714
const std::vector<URI>& array_dir_uris,
711715
const std::vector<URI>& commits_dir_uris,
712716
const std::vector<URI>& consolidated_uris,
713-
const std::unordered_set<std::string>& consolidated_uris_set);
717+
const std::unordered_set<URI>& consolidated_uris_set);
714718

715719
/**
716720
* Loads the consolidated commit URI from the commit directory and the files
717721
* to vacuum for the commits directory.
718722
*
719723
* @return Status, consolidated uris, set of all consolidated uris.
720724
*/
721-
tuple<
722-
Status,
723-
optional<std::vector<URI>>,
724-
optional<std::unordered_set<std::string>>>
725+
tuple<Status, optional<std::vector<URI>>, optional<std::unordered_set<URI>>>
725726
load_consolidated_commit_uris(const std::vector<URI>& commits_dir_uris);
726727

727728
/** Loads the array metadata URIs. */
@@ -821,7 +822,7 @@ class ArrayDirectory {
821822
Status is_fragment(
822823
const URI& uri,
823824
const std::unordered_set<std::string>& ok_uris_set,
824-
const std::unordered_set<std::string>& consolidated_uris_set,
825+
const std::unordered_set<URI>& consolidated_uris_set,
825826
int32_t* is_fragment) const;
826827

827828
/**

tiledb/sm/consolidator/consolidator.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ void Consolidator::fragments_consolidate(
252252

253253
void Consolidator::write_consolidated_commits_file(
254254
format_version_t write_version,
255-
ArrayDirectory array_dir,
255+
const ArrayDirectory& array_dir,
256256
const std::vector<URI>& commit_uris,
257257
ContextResources& resources) {
258258
// Compute the file name.

tiledb/sm/consolidator/consolidator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class Consolidator {
215215
*/
216216
static void write_consolidated_commits_file(
217217
format_version_t write_version,
218-
ArrayDirectory array_dir,
218+
const ArrayDirectory& array_dir,
219219
const std::vector<URI>& commit_uris,
220220
ContextResources& resources);
221221

0 commit comments

Comments
 (0)