Skip to content

Commit 3eeba4b

Browse files
robertbindarteo-tsirpanisshaunrd0
authored
Remove Array::get_max_buffer_sizes. (#5292)
This PR removes all the code associated with `Array::get_max_buffer_sizes`. This means Array bookkeeping members, functions for calculating maximum buffer sizes based on a subarray, capnp struct, rest client functions. The `tiledb_serialize_array_max_buffer_sizes` C API was kept until downstreams migrate, but its implementation was updated to always fail. Soon will follow a PR in the REST server for removing the handling code of this request. --- TYPE: NO_HISTORY --------- Co-authored-by: Theodore Tsirpanis <[email protected]> Co-authored-by: Shaun Reed <[email protected]>
1 parent b1dd2b4 commit 3eeba4b

File tree

13 files changed

+10
-908
lines changed

13 files changed

+10
-908
lines changed

test/src/unit-capi-rest-dense_array.cc

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,36 +1794,6 @@ TEST_CASE_METHOD(
17941794
tiledb_array_free(&array);
17951795
}
17961796

1797-
TEST_CASE_METHOD(
1798-
DenseArrayRESTFx,
1799-
"C API: REST Test dense array, get max buffer sizes",
1800-
"[capi][rest][dense]") {
1801-
array_uri_ = vfs_test_setup_.array_uri("max_buffer_sizes_array");
1802-
create_dense_array(array_uri_);
1803-
1804-
// Check max buffer sizes with empty array
1805-
tiledb_array_t* array;
1806-
int rc = tiledb_array_alloc(ctx_, array_uri_.c_str(), &array);
1807-
CHECK(rc == TILEDB_OK);
1808-
rc = tiledb_array_open(ctx_, array, TILEDB_READ);
1809-
CHECK(rc == TILEDB_OK);
1810-
REQUIRE(tiledb_array_close(ctx_, array) == TILEDB_OK);
1811-
tiledb_array_free(&array);
1812-
1813-
// Write array
1814-
write_dense_array(array_uri_);
1815-
1816-
// Check max buffer sizes for whole domain
1817-
rc = tiledb_array_alloc(ctx_, array_uri_.c_str(), &array);
1818-
CHECK(rc == TILEDB_OK);
1819-
rc = tiledb_array_open(ctx_, array, TILEDB_READ);
1820-
CHECK(rc == TILEDB_OK);
1821-
1822-
// Clean up
1823-
REQUIRE(tiledb_array_close(ctx_, array) == TILEDB_OK);
1824-
tiledb_array_free(&array);
1825-
}
1826-
18271797
TEST_CASE_METHOD(
18281798
DenseArrayRESTFx,
18291799
"C API: REST Test dense array, error without rest server configured",

tiledb/sm/array/array.cc

Lines changed: 0 additions & 267 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,6 @@ Status Array::close() {
600600
return Status::Ok();
601601
}
602602

603-
clear_last_max_buffer_sizes();
604-
605603
try {
606604
set_array_closed();
607605

@@ -990,131 +988,6 @@ QueryType Array::get_query_type() const {
990988
return query_type_;
991989
}
992990

993-
Status Array::get_max_buffer_size(
994-
const char* name, const void* subarray, uint64_t* buffer_size) {
995-
// Check if array is open
996-
if (!is_open_) {
997-
return LOG_STATUS(
998-
Status_ArrayError("Cannot get max buffer size; Array is not open"));
999-
}
1000-
1001-
// Error if the array was not opened in read mode
1002-
if (query_type_ != QueryType::READ) {
1003-
return LOG_STATUS(
1004-
Status_ArrayError("Cannot get max buffer size; "
1005-
"Array was not opened in read mode"));
1006-
}
1007-
1008-
// Check if name is null
1009-
if (name == nullptr) {
1010-
return LOG_STATUS(Status_ArrayError(
1011-
"Cannot get max buffer size; Attribute/Dimension name is null"));
1012-
}
1013-
1014-
// Not applicable to heterogeneous domains
1015-
if (!array_schema_latest().domain().all_dims_same_type()) {
1016-
return LOG_STATUS(
1017-
Status_ArrayError("Cannot get max buffer size; Function not "
1018-
"applicable to heterogeneous domains"));
1019-
}
1020-
1021-
// Not applicable to variable-sized dimensions
1022-
if (!array_schema_latest().domain().all_dims_fixed()) {
1023-
return LOG_STATUS(Status_ArrayError(
1024-
"Cannot get max buffer size; Function not "
1025-
"applicable to domains with variable-sized dimensions"));
1026-
}
1027-
1028-
// Check if name is attribute or dimension
1029-
bool is_dim = array_schema_latest().is_dim(name);
1030-
bool is_attr = array_schema_latest().is_attr(name);
1031-
1032-
// Check if attribute/dimension exists
1033-
if (name != constants::coords && !is_dim && !is_attr) {
1034-
return LOG_STATUS(Status_ArrayError(
1035-
std::string("Cannot get max buffer size; Attribute/Dimension '") +
1036-
name + "' does not exist"));
1037-
}
1038-
1039-
// Check if attribute/dimension is fixed sized
1040-
if (array_schema_latest().var_size(name)) {
1041-
return LOG_STATUS(Status_ArrayError(
1042-
std::string("Cannot get max buffer size; Attribute/Dimension '") +
1043-
name + "' is var-sized"));
1044-
}
1045-
1046-
RETURN_NOT_OK(compute_max_buffer_sizes(subarray));
1047-
1048-
// Retrieve buffer size
1049-
auto it = last_max_buffer_sizes_.find(name);
1050-
assert(it != last_max_buffer_sizes_.end());
1051-
*buffer_size = it->second.first;
1052-
1053-
return Status::Ok();
1054-
}
1055-
1056-
Status Array::get_max_buffer_size(
1057-
const char* name,
1058-
const void* subarray,
1059-
uint64_t* buffer_off_size,
1060-
uint64_t* buffer_val_size) {
1061-
// Check if array is open
1062-
if (!is_open_) {
1063-
return LOG_STATUS(
1064-
Status_ArrayError("Cannot get max buffer size; Array is not open"));
1065-
}
1066-
1067-
// Error if the array was not opened in read mode
1068-
if (query_type_ != QueryType::READ) {
1069-
return LOG_STATUS(
1070-
Status_ArrayError("Cannot get max buffer size; "
1071-
"Array was not opened in read mode"));
1072-
}
1073-
1074-
// Check if name is null
1075-
if (name == nullptr) {
1076-
return LOG_STATUS(Status_ArrayError(
1077-
"Cannot get max buffer size; Attribute/Dimension name is null"));
1078-
}
1079-
1080-
// Not applicable to heterogeneous domains
1081-
if (!array_schema_latest().domain().all_dims_same_type()) {
1082-
return LOG_STATUS(
1083-
Status_ArrayError("Cannot get max buffer size; Function not "
1084-
"applicable to heterogeneous domains"));
1085-
}
1086-
1087-
// Not applicable to variable-sized dimensions
1088-
if (!array_schema_latest().domain().all_dims_fixed()) {
1089-
return LOG_STATUS(Status_ArrayError(
1090-
"Cannot get max buffer size; Function not "
1091-
"applicable to domains with variable-sized dimensions"));
1092-
}
1093-
1094-
RETURN_NOT_OK(compute_max_buffer_sizes(subarray));
1095-
1096-
// Check if attribute/dimension exists
1097-
auto it = last_max_buffer_sizes_.find(name);
1098-
if (it == last_max_buffer_sizes_.end()) {
1099-
return LOG_STATUS(Status_ArrayError(
1100-
std::string("Cannot get max buffer size; Attribute/Dimension '") +
1101-
name + "' does not exist"));
1102-
}
1103-
1104-
// Check if attribute/dimension is var-sized
1105-
if (!array_schema_latest().var_size(name)) {
1106-
return LOG_STATUS(Status_ArrayError(
1107-
std::string("Cannot get max buffer size; Attribute/Dimension '") +
1108-
name + "' is fixed-sized"));
1109-
}
1110-
1111-
// Retrieve buffer sizes
1112-
*buffer_off_size = it->second.first;
1113-
*buffer_val_size = it->second.second;
1114-
1115-
return Status::Ok();
1116-
}
1117-
1118991
Status Array::reopen() {
1119992
// Note: Array will only reopen for reads. This is why we are checking the
1120993
// timestamp for the array directory and not new components. This needs to be
@@ -1155,9 +1028,6 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) {
11551028
}
11561029
array_dir_timestamp_start_ = timestamp_start;
11571030

1158-
// Reset the last max buffer sizes.
1159-
clear_last_max_buffer_sizes();
1160-
11611031
// Reopen metadata.
11621032
auto key = opened_array_->encryption_key();
11631033
opened_array_ = make_shared<OpenedArray>(
@@ -1902,143 +1772,6 @@ Array::open_for_writes() {
19021772
return {array_schema_latest, array_schemas_all};
19031773
}
19041774

1905-
void Array::clear_last_max_buffer_sizes() {
1906-
last_max_buffer_sizes_.clear();
1907-
last_max_buffer_sizes_subarray_.clear();
1908-
last_max_buffer_sizes_subarray_.shrink_to_fit();
1909-
}
1910-
1911-
Status Array::compute_max_buffer_sizes(const void* subarray) {
1912-
// Applicable only to domains where all dimensions have the same type
1913-
if (!array_schema_latest().domain().all_dims_same_type()) {
1914-
return LOG_STATUS(
1915-
Status_ArrayError("Cannot compute max buffer sizes; Inapplicable when "
1916-
"dimension domains have different types"));
1917-
}
1918-
1919-
// Allocate space for max buffer sizes subarray
1920-
auto dim_num = array_schema_latest().dim_num();
1921-
auto coord_size{
1922-
array_schema_latest().domain().dimension_ptr(0)->coord_size()};
1923-
auto subarray_size = 2 * dim_num * coord_size;
1924-
last_max_buffer_sizes_subarray_.resize(subarray_size);
1925-
1926-
// Compute max buffer sizes
1927-
if (last_max_buffer_sizes_.empty() ||
1928-
std::memcmp(
1929-
&last_max_buffer_sizes_subarray_[0], subarray, subarray_size) != 0) {
1930-
last_max_buffer_sizes_.clear();
1931-
1932-
// Get all attributes and coordinates
1933-
auto& attributes = array_schema_latest().attributes();
1934-
last_max_buffer_sizes_.clear();
1935-
for (const auto& attr : attributes)
1936-
last_max_buffer_sizes_[attr->name()] =
1937-
std::pair<uint64_t, uint64_t>(0, 0);
1938-
last_max_buffer_sizes_[constants::coords] =
1939-
std::pair<uint64_t, uint64_t>(0, 0);
1940-
for (unsigned d = 0; d < dim_num; ++d)
1941-
last_max_buffer_sizes_
1942-
[array_schema_latest().domain().dimension_ptr(d)->name()] =
1943-
std::pair<uint64_t, uint64_t>(0, 0);
1944-
1945-
RETURN_NOT_OK(compute_max_buffer_sizes(subarray, &last_max_buffer_sizes_));
1946-
}
1947-
1948-
// Update subarray
1949-
std::memcpy(&last_max_buffer_sizes_subarray_[0], subarray, subarray_size);
1950-
1951-
return Status::Ok();
1952-
}
1953-
1954-
Status Array::compute_max_buffer_sizes(
1955-
const void* subarray,
1956-
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
1957-
buffer_sizes) const {
1958-
if (remote_) {
1959-
auto rest_client = resources_.rest_client();
1960-
if (rest_client == nullptr) {
1961-
return LOG_STATUS(Status_ArrayError(
1962-
"Cannot get max buffer sizes; remote array with no REST client."));
1963-
}
1964-
1965-
return rest_client->get_array_max_buffer_sizes(
1966-
array_uri_, array_schema_latest(), subarray, buffer_sizes);
1967-
}
1968-
1969-
// Keep the current opened array alive for the duration of this call.
1970-
auto opened_array = opened_array_;
1971-
auto& fragment_metadata = opened_array->fragment_metadata();
1972-
auto& array_schema_latest = opened_array->array_schema_latest();
1973-
1974-
// Return if there are no metadata
1975-
if (fragment_metadata.empty()) {
1976-
return Status::Ok();
1977-
}
1978-
1979-
// First we calculate a rough upper bound. Especially for dense
1980-
// arrays, this will not be accurate, as it accounts only for the
1981-
// non-empty regions of the subarray.
1982-
for (auto& meta : fragment_metadata) {
1983-
meta->add_max_buffer_sizes(*encryption_key(), subarray, buffer_sizes);
1984-
}
1985-
1986-
// Prepare an NDRange for the subarray
1987-
auto dim_num = array_schema_latest.dim_num();
1988-
NDRange sub(dim_num);
1989-
auto sub_ptr = (const unsigned char*)subarray;
1990-
uint64_t offset = 0;
1991-
for (unsigned d = 0; d < dim_num; ++d) {
1992-
auto r_size{2 * array_schema_latest.dimension_ptr(d)->coord_size()};
1993-
sub[d] = Range(&sub_ptr[offset], r_size);
1994-
offset += r_size;
1995-
}
1996-
1997-
// Rectify bound for dense arrays
1998-
if (array_schema_latest.dense()) {
1999-
auto cell_num = array_schema_latest.domain().cell_num(sub);
2000-
// `cell_num` becomes 0 when `subarray` is huge, leading to a
2001-
// `uint64_t` overflow.
2002-
if (cell_num != 0) {
2003-
for (auto& it : *buffer_sizes) {
2004-
if (array_schema_latest.var_size(it.first)) {
2005-
it.second.first = cell_num * constants::cell_var_offset_size;
2006-
it.second.second +=
2007-
cell_num * datatype_size(array_schema_latest.type(it.first));
2008-
} else {
2009-
it.second.first = cell_num * array_schema_latest.cell_size(it.first);
2010-
}
2011-
}
2012-
}
2013-
}
2014-
2015-
// Rectify bound for sparse arrays with integer domain, without duplicates
2016-
if (!array_schema_latest.dense() && !array_schema_latest.allows_dups() &&
2017-
array_schema_latest.domain().all_dims_int()) {
2018-
auto cell_num = array_schema_latest.domain().cell_num(sub);
2019-
// `cell_num` becomes 0 when `subarray` is huge, leading to a
2020-
// `uint64_t` overflow.
2021-
if (cell_num != 0) {
2022-
for (auto& it : *buffer_sizes) {
2023-
if (!array_schema_latest.var_size(it.first)) {
2024-
// Check for overflow
2025-
uint64_t new_size =
2026-
cell_num * array_schema_latest.cell_size(it.first);
2027-
if (new_size / array_schema_latest.cell_size((it.first)) !=
2028-
cell_num) {
2029-
continue;
2030-
}
2031-
2032-
// Potentially rectify size
2033-
it.second.first = std::min(it.second.first, new_size);
2034-
}
2035-
}
2036-
}
2037-
}
2038-
2039-
return Status::Ok();
2040-
}
2041-
20421775
void Array::do_load_metadata() {
20431776
if (!array_directory().loaded()) {
20441777
throw ArrayException(

0 commit comments

Comments
 (0)