Skip to content

Commit b1dd2b4

Browse files
shaunrd0ypatia
andauthored
Load enumerations for all array schemas in a single request. (#5349)
This fixes a performance regression added in #5291 while loading enumerations for all array schemas. The regression impacts arrays with multiple evolved schemas. Previously #5291 introduced a loop over REST requests for-each schema, this PR loads all enumerations for all schemas in a single request. --- TYPE: C_API DESC: Introduce tiledb_array_load_enumerations_all_schemas TYPE: CPP_API DESC: Introduce ArrayExperimental::load_enumerations_all_schemas --------- Co-authored-by: Ypatia Tsavliri <[email protected]>
1 parent 8dbab75 commit b1dd2b4

18 files changed

+1029
-98
lines changed

test/src/unit-cppapi-enumerations.cc

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,11 @@ TEST_CASE_METHOD(
335335
"CPP API: Load All Enumerations - All Schemas",
336336
"[enumeration][array][load-all-enumerations][all-schemas][rest]") {
337337
create_array();
338+
339+
// Loading the array with array open v1 will only initialize the latest schema
340+
// For the first test this is fine, we only need to load enumerations for the
341+
// latest schema. In subsequent tests we will need to call
342+
// ArrayExperimental::load_enumerations_all_schemas.
338343
auto array = tiledb::Array(ctx_, uri_, TILEDB_READ);
339344
auto schema = array.load_schema(ctx_, uri_);
340345
REQUIRE(
@@ -344,6 +349,16 @@ TEST_CASE_METHOD(
344349
false);
345350
std::string schema_name_1 = schema.ptr()->array_schema()->name();
346351

352+
// If not using array open v3 just test that the correct exception is thrown
353+
if (!array.ptr()->array()->use_refactored_array_open()) {
354+
CHECK_THROWS_WITH(
355+
ArrayExperimental::load_enumerations_all_schemas(ctx_, array),
356+
Catch::Matchers::ContainsSubstring(
357+
"The array must be opened using "
358+
"`rest.use_refactored_array_open=true`"));
359+
return;
360+
}
361+
347362
// Evolve once to add an enumeration.
348363
ArraySchemaEvolution ase(ctx_);
349364
std::vector<std::string> var_values{"one", "two", "three"};
@@ -354,7 +369,7 @@ TEST_CASE_METHOD(
354369
ase.add_attribute(attr4);
355370
ase.array_evolve(uri_);
356371
array.reopen();
357-
ArrayExperimental::load_all_enumerations(ctx_, array);
372+
CHECK_NOTHROW(ArrayExperimental::load_enumerations_all_schemas(ctx_, array));
358373
auto all_schemas = array.ptr()->array()->array_schemas_all();
359374
schema = array.load_schema(ctx_, uri_);
360375
std::string schema_name_2 = schema.ptr()->array_schema()->name();
@@ -379,9 +394,8 @@ TEST_CASE_METHOD(
379394
ase2.drop_attribute("attr1");
380395
CHECK_NOTHROW(ase2.array_evolve(uri_));
381396
// Apply evolution to the array and reopen.
382-
CHECK_NOTHROW(array.close());
383-
CHECK_NOTHROW(array.open(TILEDB_READ));
384-
ArrayExperimental::load_all_enumerations(ctx_, array);
397+
CHECK_NOTHROW(array.reopen());
398+
CHECK_NOTHROW(ArrayExperimental::load_enumerations_all_schemas(ctx_, array));
385399
all_schemas = array.ptr()->array()->array_schemas_all();
386400
schema = array.load_schema(ctx_, uri_);
387401
std::string schema_name_3 = schema.ptr()->array_schema()->name();
@@ -416,9 +430,8 @@ TEST_CASE_METHOD(
416430
CHECK_NOTHROW(ase3.array_evolve(uri_));
417431

418432
// Apply evolution to the array and reopen.
419-
CHECK_NOTHROW(array.close());
420-
CHECK_NOTHROW(array.open(TILEDB_READ));
421-
ArrayExperimental::load_all_enumerations(ctx_, array);
433+
CHECK_NOTHROW(array.reopen());
434+
CHECK_NOTHROW(ArrayExperimental::load_enumerations_all_schemas(ctx_, array));
422435
all_schemas = array.ptr()->array()->array_schemas_all();
423436
schema = array.load_schema(ctx_, uri_);
424437
std::string schema_name_4 = schema.ptr()->array_schema()->name();

test/src/unit-enumerations.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,16 @@ TEST_CASE_METHOD(
11281128
REQUIRE(schema->is_enumeration_loaded("test_enmr") == false);
11291129
std::string schema_name_1 = schema->name();
11301130

1131+
// If not using array open v3 just test that the correct exception is thrown
1132+
if (!array->use_refactored_array_open()) {
1133+
CHECK_THROWS_WITH(
1134+
array->load_all_enumerations(true),
1135+
Catch::Matchers::ContainsSubstring(
1136+
"The array must be opened using "
1137+
"`rest.use_refactored_array_open=true`"));
1138+
return;
1139+
}
1140+
11311141
// Evolve once to add an enumeration.
11321142
auto ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
11331143
std::vector<std::string> var_values{"one", "two", "three"};
@@ -1141,7 +1151,7 @@ TEST_CASE_METHOD(
11411151
CHECK_NOTHROW(Array::evolve_array_schema(
11421152
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
11431153
CHECK(array->reopen().ok());
1144-
CHECK_NOTHROW(array->load_all_enumerations());
1154+
CHECK_NOTHROW(array->load_all_enumerations(true));
11451155
auto all_schemas = array->array_schemas_all();
11461156
schema = array->array_schema_latest_ptr();
11471157
std::string schema_name_2 = schema->name();
@@ -1162,7 +1172,7 @@ TEST_CASE_METHOD(
11621172
CHECK_NOTHROW(Array::evolve_array_schema(
11631173
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
11641174
CHECK(array->reopen().ok());
1165-
CHECK_NOTHROW(array->load_all_enumerations());
1175+
CHECK_NOTHROW(array->load_all_enumerations(true));
11661176
all_schemas = array->array_schemas_all();
11671177
schema = array->array_schema_latest_ptr();
11681178
std::string schema_name_3 = schema->name();

tiledb/api/c_api/array/array_api.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,21 @@ capi_return_t tiledb_array_load_all_enumerations(const tiledb_array_t* array) {
663663
return TILEDB_OK;
664664
}
665665

666+
capi_return_t tiledb_array_load_enumerations_all_schemas(
667+
const tiledb_array_t* array) {
668+
ensure_array_is_valid(array);
669+
// Array::array_schemas_all_ is only initialized using array open V3, so we
670+
// won't have schemas to store the loaded enumerations unless array open V3 is
671+
// used.
672+
if (!array->array()->use_refactored_array_open()) {
673+
throw CAPIException(
674+
"Unable to load enumerations for all array schemas; The array must be "
675+
"opened using `rest.use_refactored_array_open=true`");
676+
}
677+
array->load_all_enumerations(true);
678+
return TILEDB_OK;
679+
}
680+
666681
} // namespace tiledb::api
667682

668683
using tiledb::api::api_entry_context;
@@ -1054,3 +1069,11 @@ CAPI_INTERFACE(
10541069
return api_entry_context<tiledb::api::tiledb_array_load_all_enumerations>(
10551070
ctx, array);
10561071
}
1072+
1073+
CAPI_INTERFACE(
1074+
array_load_enumerations_all_schemas,
1075+
tiledb_ctx_t* ctx,
1076+
const tiledb_array_t* array) {
1077+
return api_entry_context<
1078+
tiledb::api::tiledb_array_load_enumerations_all_schemas>(ctx, array);
1079+
}

tiledb/api/c_api/array/array_api_experimental.h

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ TILEDB_EXPORT capi_return_t tiledb_array_get_enumeration(
9090
tiledb_enumeration_t** enumeration) TILEDB_NOEXCEPT;
9191

9292
/**
93-
* Load all enumerations for the array.
93+
* Load all enumerations for the array's latest array schema.
9494
*
9595
* **Example:**
9696
*
@@ -100,13 +100,30 @@ TILEDB_EXPORT capi_return_t tiledb_array_get_enumeration(
100100
*
101101
* @param[in] ctx The TileDB context.
102102
* @param[in] array The TileDB array.
103-
* @param[in] latest_only If non-zero, only load enumerations for the latest
104-
* schema.
105103
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
106104
*/
107105
TILEDB_EXPORT capi_return_t tiledb_array_load_all_enumerations(
108106
tiledb_ctx_t* ctx, const tiledb_array_t* array) TILEDB_NOEXCEPT;
109107

108+
/**
109+
* Load all enumerations for all schemas in the array.
110+
*
111+
* This method requires the array to be opened with the config option
112+
* `rest.use_refactored_array_open=true` (default).
113+
*
114+
* **Example:**
115+
*
116+
* @code{.c}
117+
* tiledb_array_load_enumerations_all_schemas(ctx, array);
118+
* @endcode
119+
*
120+
* @param[in] ctx The TileDB context.
121+
* @param[in] array The TileDB array.
122+
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
123+
*/
124+
TILEDB_EXPORT capi_return_t tiledb_array_load_enumerations_all_schemas(
125+
tiledb_ctx_t* ctx, const tiledb_array_t* array) TILEDB_NOEXCEPT;
126+
110127
#ifdef __cplusplus
111128
}
112129
#endif

tiledb/api/c_api/array/array_api_internal.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,16 @@ struct tiledb_array_handle_t
147147
return array_->get_enumeration(enumeration_name);
148148
}
149149

150+
std::unordered_map<
151+
std::string,
152+
std::vector<shared_ptr<const tiledb::sm::Enumeration>>>
153+
get_enumerations_all_schemas() {
154+
return array_->get_enumerations_all_schemas();
155+
}
156+
150157
std::vector<shared_ptr<const tiledb::sm::Enumeration>> get_enumerations(
151-
const std::vector<std::string>& enumeration_names,
152-
shared_ptr<tiledb::sm::ArraySchema> schema) {
153-
return array_->get_enumerations(enumeration_names, schema);
158+
const std::vector<std::string>& enumeration_names) {
159+
return array_->get_enumerations(enumeration_names);
154160
}
155161

156162
void get_metadata(
@@ -179,8 +185,8 @@ struct tiledb_array_handle_t
179185
return array_->is_open();
180186
}
181187

182-
void load_all_enumerations() const {
183-
array_->load_all_enumerations();
188+
void load_all_enumerations(bool all_schemas = false) const {
189+
array_->load_all_enumerations(all_schemas);
184190
}
185191

186192
tiledb::sm::NDRange& loaded_non_empty_domain() {

tiledb/sm/array/array.cc

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -805,33 +805,85 @@ void Array::encryption_type(
805805

806806
shared_ptr<const Enumeration> Array::get_enumeration(
807807
const std::string& enumeration_name) {
808+
return get_enumerations({enumeration_name})[0];
809+
}
810+
811+
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>>
812+
Array::get_enumerations_all_schemas() {
808813
if (!is_open_) {
809814
throw ArrayException("Unable to load enumerations; Array is not open.");
810815
}
811816

812-
auto schema = opened_array_->array_schema_latest_ptr();
813-
if (!schema->has_enumeration(enumeration_name)) {
814-
throw ArrayException(
815-
"Unable to get enumeration; Enumeration '" + enumeration_name +
816-
"' does not exist.");
817-
} else if (schema->is_enumeration_loaded(enumeration_name)) {
818-
return schema->get_enumeration(enumeration_name);
817+
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>>
818+
ret;
819+
if (remote_) {
820+
auto rest_client = resources_.rest_client();
821+
if (rest_client == nullptr) {
822+
throw ArrayException(
823+
"Error loading enumerations; Remote array with no REST client.");
824+
}
825+
826+
// Pass an empty list of enumeration names. REST will use timestamps to
827+
// load all enumerations on all schemas for the array within that range.
828+
ret = rest_client->post_enumerations_from_rest(
829+
array_uri_,
830+
array_dir_timestamp_start_,
831+
array_dir_timestamp_end_,
832+
this,
833+
{},
834+
memory_tracker_);
835+
836+
// Store the enumerations from the REST response.
837+
for (const auto& schema_enmrs : ret) {
838+
auto schema = array_schemas_all().at(schema_enmrs.first);
839+
for (const auto& enmr : schema_enmrs.second) {
840+
schema->store_enumeration(enmr);
841+
}
842+
}
843+
} else {
844+
for (const auto& schema : array_schemas_all()) {
845+
std::unordered_set<std::string> enmrs_to_load;
846+
auto enumeration_names = schema.second->get_enumeration_names();
847+
// Dedupe requested names and filter out anything already loaded.
848+
for (auto& enmr_name : enumeration_names) {
849+
if (schema.second->is_enumeration_loaded(enmr_name)) {
850+
continue;
851+
}
852+
enmrs_to_load.insert(enmr_name);
853+
}
854+
855+
// Create a vector of paths to be loaded.
856+
std::vector<std::string> paths_to_load;
857+
for (auto& enmr_name : enmrs_to_load) {
858+
auto path = schema.second->get_enumeration_path_name(enmr_name);
859+
paths_to_load.push_back(path);
860+
}
861+
862+
// Load the enumerations from storage
863+
auto loaded = array_directory().load_enumerations_from_paths(
864+
paths_to_load, *encryption_key(), memory_tracker_);
865+
866+
// Store the loaded enumerations in the schema.
867+
for (auto& enmr : loaded) {
868+
schema.second->store_enumeration(enmr);
869+
}
870+
ret[schema.first] = loaded;
871+
}
819872
}
820873

821-
return get_enumerations({enumeration_name}, schema)[0];
874+
return ret;
822875
}
823876

824877
std::vector<shared_ptr<const Enumeration>> Array::get_enumerations(
825-
const std::vector<std::string>& enumeration_names,
826-
shared_ptr<ArraySchema> schema) {
878+
const std::vector<std::string>& enumeration_names) {
827879
if (!is_open_) {
828880
throw ArrayException("Unable to load enumerations; Array is not open.");
829881
}
830882

831883
// Dedupe requested names and filter out anything already loaded.
832884
std::unordered_set<std::string> enmrs_to_load;
833885
for (auto& enmr_name : enumeration_names) {
834-
if (schema->is_enumeration_loaded(enmr_name)) {
886+
if (array_schema_latest().is_enumeration_loaded(enmr_name)) {
835887
continue;
836888
}
837889
enmrs_to_load.insert(enmr_name);
@@ -856,16 +908,16 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations(
856908

857909
loaded = rest_client->post_enumerations_from_rest(
858910
array_uri_,
859-
schema->timestamp_range().first,
860-
schema->timestamp_range().second,
911+
array_dir_timestamp_start_,
912+
array_dir_timestamp_end_,
861913
this,
862914
names_to_load,
863-
memory_tracker_);
915+
memory_tracker_)[array_schema_latest().name()];
864916
} else {
865917
// Create a vector of paths to be loaded.
866918
std::vector<std::string> paths_to_load;
867919
for (auto& enmr_name : enmrs_to_load) {
868-
auto path = schema->get_enumeration_path_name(enmr_name);
920+
auto path = array_schema_latest().get_enumeration_path_name(enmr_name);
869921
paths_to_load.push_back(path);
870922
}
871923

@@ -876,25 +928,36 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations(
876928

877929
// Store the loaded enumerations in the schema
878930
for (auto& enmr : loaded) {
879-
schema->store_enumeration(enmr);
931+
opened_array_->array_schema_latest_ptr()->store_enumeration(enmr);
880932
}
881933
}
882934

883935
// Return the requested list of enumerations
884936
std::vector<shared_ptr<const Enumeration>> ret(enumeration_names.size());
885937
for (size_t i = 0; i < enumeration_names.size(); i++) {
886-
ret[i] = schema->get_enumeration(enumeration_names[i]);
938+
ret[i] = array_schema_latest().get_enumeration(enumeration_names[i]);
887939
}
888940
return ret;
889941
}
890942

891-
void Array::load_all_enumerations() {
943+
void Array::load_all_enumerations(bool all_schemas) {
892944
if (!is_open_) {
893945
throw ArrayException("Unable to load all enumerations; Array is not open.");
894946
}
895947
// Load all enumerations, discarding the returned list of loaded enumerations.
896-
for (const auto& schema : array_schemas_all()) {
897-
get_enumerations(schema.second->get_enumeration_names(), schema.second);
948+
if (all_schemas) {
949+
// Unless we are using array open V3, Array::array_schemas_all_ will not be
950+
// initialized. We throw an exception since this is required to store the
951+
// loaded enumerations.
952+
if (!use_refactored_array_open()) {
953+
throw ArrayException(
954+
"Unable to load enumerations for all array schemas; The array must "
955+
"be opened using `rest.use_refactored_array_open=true`");
956+
}
957+
958+
get_enumerations_all_schemas();
959+
} else {
960+
get_enumerations(array_schema_latest().get_enumeration_names());
898961
}
899962
}
900963

0 commit comments

Comments
 (0)