From 7464c415ffa5aa674bb3372806251eb3efbac0fa Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Tue, 30 Jan 2024 16:29:28 -0500 Subject: [PATCH 01/10] Add C.41 Subarray constructor + Unstatus subarray_from_capnp --- test/support/src/serialization_wrappers.cc | 10 +- tiledb/sm/serialization/query.cc | 147 ++++++++++++--------- tiledb/sm/serialization/query.h | 8 +- tiledb/sm/subarray/subarray.cc | 26 ++++ tiledb/sm/subarray/subarray.h | 31 +++++ 5 files changed, 158 insertions(+), 64 deletions(-) diff --git a/test/support/src/serialization_wrappers.cc b/test/support/src/serialization_wrappers.cc index fa7dec6fc12..0973739a67a 100644 --- a/test/support/src/serialization_wrappers.cc +++ b/test/support/src/serialization_wrappers.cc @@ -204,11 +204,15 @@ void tiledb_subarray_serialize( .ok()); // Deserialize tiledb_subarray_t* deserialized_subarray; + auto layout = (*subarray)->subarray_->layout(); + auto stats = (*subarray)->subarray_->stats(); + shared_ptr dummy_logger = make_shared(HERE(), ""); + tiledb::test::require_tiledb_ok( ctx, tiledb_subarray_alloc(ctx, array, &deserialized_subarray)); - REQUIRE(tiledb::sm::serialization::subarray_from_capnp( - builder, deserialized_subarray->subarray_) - .ok()); + *deserialized_subarray->subarray_ = + tiledb::sm::serialization::subarray_from_capnp( + builder, array->array_.get(), layout, stats, dummy_logger); *subarray = deserialized_subarray; #endif } diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index e3b0cd7c95c..b2ca1d37df1 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -261,37 +261,61 @@ Status subarray_to_capnp( return Status::Ok(); } -Status subarray_from_capnp( - const capnp::Subarray::Reader& reader, Subarray* subarray) { - RETURN_NOT_OK(subarray->set_coalesce_ranges(reader.getCoalesceRanges())); +Subarray subarray_from_capnp( + const capnp::Subarray::Reader& reader, + const Array* array, + Layout layout, + stats::Stats* parent_stats, + shared_ptr logger) { + bool coalesce_ranges = reader.getCoalesceRanges(); auto ranges_reader = reader.getRanges(); + uint32_t dim_num = ranges_reader.size(); + std::vector range_subset(dim_num); + std::vector is_default(dim_num, false); for (uint32_t i = 0; i < dim_num; i++) { auto range_reader = ranges_reader[i]; - - auto data = range_reader.getBuffer(); - auto data_ptr = data.asBytes(); + Datatype type = Datatype::UINT8; + throw_if_not_ok(datatype_enum(range_reader.getType(), &type)); + auto dim = array->array_schema_latest().dimension_ptr(i); + + bool implicitly_initialized = range_reader.getHasDefaultRange(); + range_subset[i] = + RangeSetAndSuperset(dim->type(), dim->domain(), true, coalesce_ranges); + is_default[i] = implicitly_initialized; if (range_reader.hasBufferSizes()) { auto ranges = range_buffers_from_capnp(range_reader); - RETURN_NOT_OK(subarray->set_ranges_for_dim(i, ranges)); - - // Set default indicator - subarray->set_is_default(i, range_reader.getHasDefaultRange()); + // If the range is implicitly initialized, the RangeSetAndSuperset + // constructor will initialize the ranges to the domain. + if (!implicitly_initialized) { + // Edge case for dimension labels where there are only label ranges set. + if (ranges.empty()) { + range_subset[i] = RangeSetAndSuperset( + dim->type(), dim->domain(), false, coalesce_ranges); + } + // Add custom ranges, clearing any implicit ranges previously set. + for (const auto& range : ranges) { + throw_if_not_ok(range_subset[i].add_range_unrestricted(range)); + } + } } else { // Handle 1.7 style ranges where there is a single range with no sizes + auto data = range_reader.getBuffer(); + auto data_ptr = data.asBytes(); Range range(data_ptr.begin(), data.size()); - RETURN_NOT_OK(subarray->set_ranges_for_dim(i, {range})); - subarray->set_is_default(i, range_reader.getHasDefaultRange()); + throw_if_not_ok(range_subset[i].add_range_unrestricted(range)); } } + std::vector> label_range_subset( + dim_num, nullopt); if (reader.hasLabelRanges()) { - subarray->add_default_label_ranges(dim_num); auto label_ranges_reader = reader.getLabelRanges(); uint32_t label_num = label_ranges_reader.size(); for (uint32_t i = 0; i < label_num; i++) { auto label_range_reader = label_ranges_reader[i]; - auto dim_id = label_range_reader.getDimensionId(); + auto dim_index = label_range_reader.getDimensionId(); + auto dim = array->array_schema_latest().dimension_ptr(dim_index); auto label_name = label_range_reader.getName(); // Deserialize ranges for this dim label @@ -299,46 +323,51 @@ Status subarray_from_capnp( auto ranges = range_buffers_from_capnp(range_reader); // Set ranges for this dim label on the subarray - subarray->set_label_ranges_for_dim(dim_id, label_name, ranges); + label_range_subset[dim_index] = { + label_name, dim->type(), coalesce_ranges}; } } + std::unordered_map> attr_range_subset; if (reader.hasAttributeRanges()) { - std::unordered_map> attr_ranges; auto attr_ranges_reader = reader.getAttributeRanges(); if (attr_ranges_reader.hasEntries()) { - for (auto attr_ranges_entry : attr_ranges_reader.getEntries()) { - auto range_reader = attr_ranges_entry.getValue(); - auto key = std::string_view{ - attr_ranges_entry.getKey().cStr(), - attr_ranges_entry.getKey().size()}; - attr_ranges[std::string{key}] = range_buffers_from_capnp(range_reader); + for (auto entry : attr_ranges_reader.getEntries()) { + auto range_reader = entry.getValue(); + std::string key{entry.getKey().cStr(), entry.getKey().size()}; + attr_range_subset[key] = range_buffers_from_capnp(range_reader); } } - - for (const auto& attr_range : attr_ranges) - subarray->set_attribute_ranges(attr_range.first, attr_range.second); } // If cap'n proto object has stats set it on c++ object + Subarray s(array, layout, parent_stats, logger, true); if (reader.hasStats()) { auto stats_data = stats_from_capnp(reader.getStats()); subarray->set_stats(stats_data); } + std::vector relevant_fragments; if (reader.hasRelevantFragments()) { - auto relevant_fragments = reader.getRelevantFragments(); - size_t count = relevant_fragments.size(); - std::vector rf; - rf.reserve(count); + auto reader_rf = reader.getRelevantFragments(); + size_t count = reader_rf.size(); + relevant_fragments.reserve(count); for (size_t i = 0; i < count; i++) { - rf.emplace_back(relevant_fragments[i]); + relevant_fragments.emplace_back(reader_rf[i]); } - - subarray->relevant_fragments() = RelevantFragments(rf); } - return Status::Ok(); + return { + array, + layout, + reader.hasStats() ? s.stats() : parent_stats, + logger, + range_subset, + is_default, + label_range_subset, + attr_range_subset, + relevant_fragments, + coalesce_ranges}; } Status subarray_partitioner_to_capnp( @@ -450,8 +479,8 @@ Status subarray_partitioner_from_capnp( RETURN_NOT_OK(layout_enum(subarray_reader.getLayout(), &layout)); // Subarray, which is used to initialize the partitioner. - Subarray subarray(array, layout, query_stats, dummy_logger, true); - RETURN_NOT_OK(subarray_from_capnp(reader.getSubarray(), &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); *partitioner = SubarrayPartitioner( &config, subarray, @@ -508,10 +537,12 @@ Status subarray_partitioner_from_capnp( partition_info->end_ = partition_info_reader.getEnd(); partition_info->split_multi_range_ = partition_info_reader.getSplitMultiRange(); - partition_info->partition_ = - Subarray(array, layout, query_stats, dummy_logger, true); - RETURN_NOT_OK(subarray_from_capnp( - partition_info_reader.getSubarray(), &partition_info->partition_)); + partition_info->partition_ = subarray_from_capnp( + partition_info_reader.getSubarray(), + array, + layout, + query_stats, + dummy_logger); if (compute_current_tile_overlap) { throw_if_not_ok(partition_info->partition_.precompute_tile_overlap( @@ -531,20 +562,18 @@ Status subarray_partitioner_from_capnp( auto sr_reader = state_reader.getSingleRange(); const unsigned num_sr = sr_reader.size(); for (unsigned i = 0; i < num_sr; i++) { - auto subarray_reader_ = sr_reader[i]; - state->single_range_.emplace_back( - array, layout, query_stats, dummy_logger, true); - Subarray& subarray_ = state->single_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); + auto subarray_reader = sr_reader[i]; + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); + state->single_range_.push_back(subarray); } auto m_reader = state_reader.getMultiRange(); const unsigned num_m = m_reader.size(); for (unsigned i = 0; i < num_m; i++) { - auto subarray_reader_ = m_reader[i]; - state->multi_range_.emplace_back( - array, layout, query_stats, dummy_logger, true); - Subarray& subarray_ = state->multi_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); + auto subarray_reader = m_reader[i]; + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); + state->multi_range_.push_back(subarray); } // Overall mem budget @@ -1107,9 +1136,9 @@ Status reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); // Read state @@ -1145,9 +1174,9 @@ Status index_reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); // Read state @@ -1184,9 +1213,9 @@ Status dense_reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); // Read state @@ -2210,9 +2239,9 @@ Status query_from_capnp( // Subarray if (writer_reader.hasSubarrayRanges()) { - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = writer_reader.getSubarrayRanges(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); } } @@ -3164,9 +3193,9 @@ void ordered_dim_label_reader_from_capnp( throw_if_not_ok(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, false); auto subarray_reader = reader_reader.getSubarray(); - throw_if_not_ok(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); throw_if_not_ok(query->set_subarray_unsafe(subarray)); // OrderedDimLabelReader requires an initialized subarray for construction. diff --git a/tiledb/sm/serialization/query.h b/tiledb/sm/serialization/query.h index 568faacf0f0..09bfc75df40 100644 --- a/tiledb/sm/serialization/query.h +++ b/tiledb/sm/serialization/query.h @@ -252,8 +252,12 @@ Status subarray_to_capnp( const Subarray* subarray, capnp::Subarray::Builder* builder); -Status subarray_from_capnp( - const capnp::Subarray::Reader& reader, Subarray* subarray); +Subarray subarray_from_capnp( + const capnp::Subarray::Reader& reader, + const Array* array, + Layout layout, + stats::Stats* parent_stats, + shared_ptr logger); void ordered_dim_label_reader_to_capnp( const Query& query, diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 6ea1846152b..a0f2efffd9d 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -150,6 +150,32 @@ Subarray::Subarray( add_default_ranges(); } +Subarray::Subarray( + const Array* array, + Layout layout, + stats::Stats* stats, + shared_ptr logger, + std::vector range_subset, + std::vector is_default, + std::vector> label_range_subset, + std::unordered_map> attr_range_subset, + std::vector relevant_fragments, + bool coalesce_ranges) + : stats_(stats) + , logger_(std::move(logger)) + , array_(array->opened_array()) + , layout_(layout) + , cell_order_(array_->array_schema_latest().cell_order()) + , range_subset_(std::move(range_subset)) + , label_range_subset_(std::move(label_range_subset)) + , attr_range_subset_(std::move(attr_range_subset)) + , is_default_(std::move(is_default)) + , est_result_size_computed_(false) + , relevant_fragments_(relevant_fragments) + , coalesce_ranges_(coalesce_ranges) + , ranges_sorted_(false) { +} + Subarray::Subarray(const Subarray& subarray) : Subarray() { // Make a deep-copy clone diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index b32a6af0cea..96eafec6d00 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -322,6 +322,37 @@ class Subarray { bool coalesce_ranges = true, StorageManager* storage_manager = nullptr); + /** + * Constructor. + * + * @param opened_array The opened array the subarray is associated with. + * @param layout The layout of the values of the subarray (of the results + * if the subarray is used for reads, or of the values provided + * by the user for writes). + * @param parent_stats The parent stats to inherit from. + * @param logger The parent logger to clone and use for logging + * @param range_subset Vector of RangeSetAndSuperset for each dimension. + * @param is_default Vector of boolean indicating if the range is default. + * @param label_range_subset Vector of optional for each + * dimension. + * @param attr_range_subset Map of attribute name to a vector of Ranges, for + * each attribute. + * @param relevant_fragments RelevantFragments object for the subarray. + * @param coalesce_ranges When enabled, ranges will attempt to coalesce + * with existing ranges as they are added + */ + Subarray( + const Array* array, + Layout layout, + stats::Stats* stats, + shared_ptr logger, + std::vector range_subset, + std::vector is_default, + std::vector> label_range_subset, + std::unordered_map> attr_range_subset, + std::vector relevant_fragments, + bool coalesce_ranges = true); + /** * Copy constructor. This performs a deep copy (including memcpy of * underlying buffers). From fc6c31f58c50906565090791c763f1bdf7db7490 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Wed, 31 Jan 2024 16:48:37 -0500 Subject: [PATCH 02/10] Fix test --- tiledb/sm/serialization/query.cc | 16 +++++++++------- tiledb/sm/subarray/range_subset.cc | 10 ++++++++++ tiledb/sm/subarray/range_subset.h | 20 ++++++++++++++++++-- tiledb/sm/subarray/subarray.cc | 10 ++++++++++ 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index b2ca1d37df1..ce0fe6ba0b1 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -291,11 +291,11 @@ Subarray subarray_from_capnp( // Edge case for dimension labels where there are only label ranges set. if (ranges.empty()) { range_subset[i] = RangeSetAndSuperset( - dim->type(), dim->domain(), false, coalesce_ranges); - } - // Add custom ranges, clearing any implicit ranges previously set. - for (const auto& range : ranges) { - throw_if_not_ok(range_subset[i].add_range_unrestricted(range)); + dim->type(), dim->domain(), {dim->domain()}, coalesce_ranges); + } else { + // Add custom ranges, clearing any implicit ranges previously set. + range_subset[i] = RangeSetAndSuperset( + dim->type(), dim->domain(), ranges, coalesce_ranges); } } } else { @@ -320,11 +320,13 @@ Subarray subarray_from_capnp( // Deserialize ranges for this dim label auto range_reader = label_range_reader.getRanges(); - auto ranges = range_buffers_from_capnp(range_reader); + auto label_ranges = range_buffers_from_capnp(range_reader); // Set ranges for this dim label on the subarray label_range_subset[dim_index] = { - label_name, dim->type(), coalesce_ranges}; + label_name, dim->type(), label_ranges, coalesce_ranges}; + range_subset[dim_index].clear(); + is_default[dim_index] = false; } } diff --git a/tiledb/sm/subarray/range_subset.cc b/tiledb/sm/subarray/range_subset.cc index 7aa6df657c5..59822b7a37a 100644 --- a/tiledb/sm/subarray/range_subset.cc +++ b/tiledb/sm/subarray/range_subset.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/subarray/range_subset.h" #include +#include using namespace tiledb::common; using namespace tiledb::type; @@ -131,6 +132,15 @@ RangeSetAndSuperset::RangeSetAndSuperset( ranges_.emplace_back(superset); } +RangeSetAndSuperset::RangeSetAndSuperset( + Datatype datatype, + const Range& superset, + std::vector subset, + bool coalesce_ranges) + : RangeSetAndSuperset(datatype, superset, false, coalesce_ranges) { + ranges_ = std::move(subset); +} + void RangeSetAndSuperset::sort_and_merge_ranges( ThreadPool* const compute_tp, bool merge) { if (ranges_.empty()) { diff --git a/tiledb/sm/subarray/range_subset.h b/tiledb/sm/subarray/range_subset.h index 16b12ab4471..f8b702b1ffe 100644 --- a/tiledb/sm/subarray/range_subset.h +++ b/tiledb/sm/subarray/range_subset.h @@ -429,10 +429,11 @@ class RangeSetAndSuperset { /** General constructor * * @param datatype The TileDB datatype of of the ranges. + * @param superset The superset of ranges to initialize the range set. * @param implicitly_initialize If ``true``, set the ranges to contain the - * full superset until a new range is explicitly added. + * full superset until a new range is explicitly added. * @param coalesce_ranges If ``true``, when adding a new range, attempt to - * combine with the first left-adjacent range found. + * combine with the first left-adjacent range found. **/ RangeSetAndSuperset( Datatype datatype, @@ -440,6 +441,21 @@ class RangeSetAndSuperset { bool implicitly_initialize, bool coalesce_ranges); + /** + * Constructor. + * + * @param datatype The TileDB datatype of of the ranges. + * @param superset The superset of ranges to initialize the range set. + * @param subset The subset of ranges to initialize the range set. + * @param coalesce_ranges If ``true``, when adding a new range, attempt to + * combine with the first left-adjacent range found. + **/ + RangeSetAndSuperset( + Datatype datatype, + const Range& superset, + std::vector subset, + bool coalesce_ranges); + /** Destructor. */ ~RangeSetAndSuperset() = default; diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index a0f2efffd9d..24ed220f50c 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -3287,5 +3287,15 @@ Subarray::LabelRangeSubset::LabelRangeSubset( , ranges_{RangeSetAndSuperset(type, Range(), false, coalesce_ranges)} { } +Subarray::LabelRangeSubset::LabelRangeSubset( + const std::string& name, + Datatype type, + std::vector ranges, + bool coalesce_ranges) + : name_{name} + , ranges_{RangeSetAndSuperset( + type, Range(), std::move(ranges), coalesce_ranges)} { +} + } // namespace sm } // namespace tiledb From 1f98d522621323359e965072db9e2dbeb7c3fa47 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Fri, 9 Feb 2024 14:32:07 -0500 Subject: [PATCH 03/10] Fix tests --- tiledb/sm/serialization/query.cc | 4 +++- tiledb/sm/subarray/subarray.cc | 4 ++-- tiledb/sm/subarray/subarray.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index ce0fe6ba0b1..c8039c2721a 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -359,6 +359,7 @@ Subarray subarray_from_capnp( } } + auto frag_meta_size = array->opened_array()->fragment_metadata().size(); return { array, layout, @@ -368,7 +369,8 @@ Subarray subarray_from_capnp( is_default, label_range_subset, attr_range_subset, - relevant_fragments, + reader.hasRelevantFragments() ? relevant_fragments : + RelevantFragments(frag_meta_size), coalesce_ranges}; } diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 24ed220f50c..3563f74f025 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -159,7 +159,7 @@ Subarray::Subarray( std::vector is_default, std::vector> label_range_subset, std::unordered_map> attr_range_subset, - std::vector relevant_fragments, + RelevantFragments relevant_fragments, bool coalesce_ranges) : stats_(stats) , logger_(std::move(logger)) @@ -171,7 +171,7 @@ Subarray::Subarray( , attr_range_subset_(std::move(attr_range_subset)) , is_default_(std::move(is_default)) , est_result_size_computed_(false) - , relevant_fragments_(relevant_fragments) + , relevant_fragments_(std::move(relevant_fragments)) , coalesce_ranges_(coalesce_ranges) , ranges_sorted_(false) { } diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 96eafec6d00..4d8c350cf83 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -350,7 +350,7 @@ class Subarray { std::vector is_default, std::vector> label_range_subset, std::unordered_map> attr_range_subset, - std::vector relevant_fragments, + RelevantFragments relevant_fragments, bool coalesce_ranges = true); /** From 3ac3cf37781ff57f2974656bc6b70edc70b9c2bf Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Fri, 9 Feb 2024 14:59:42 -0500 Subject: [PATCH 04/10] Remove set_subarray_unsafe --- tiledb/sm/query/query.cc | 9 --------- tiledb/sm/query/query.h | 12 ------------ tiledb/sm/serialization/query.cc | 12 ++++++------ tiledb/sm/subarray/subarray.cc | 16 ---------------- tiledb/sm/subarray/subarray.h | 11 ----------- 5 files changed, 6 insertions(+), 54 deletions(-) diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 9e3880646dc..7f67e80b2aa 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -1528,11 +1528,6 @@ const Subarray* Query::subarray() const { return &subarray_; } -Status Query::set_subarray_unsafe(const Subarray& subarray) { - subarray_ = subarray; - return Status::Ok(); -} - void Query::set_subarray(const tiledb::sm::Subarray& subarray) { // Perform checks related to the query type. switch (type_) { @@ -1583,10 +1578,6 @@ Status Query::set_subarray_unsafe(const NDRange& subarray) { return Status::Ok(); } -void Query::set_subarray_unsafe(const void* subarray) { - subarray_.set_subarray_unsafe(subarray); -} - Status Query::submit() { // Do not resubmit completed reads. if (type_ == QueryType::READ && status_ == QueryStatus::COMPLETED) { diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 520934c61df..04469841483 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -617,21 +617,9 @@ class Query { */ void set_subarray(const tiledb::sm::Subarray& subarray); - /** Sets the query subarray, without performing any checks. */ - Status set_subarray_unsafe(const Subarray& subarray); - /** Sets the query subarray, without performing any checks. */ Status set_subarray_unsafe(const NDRange& subarray); - /** - * Sets the query subarray without performing any checks. - * - * Used for deserialize dense writes. - * - * @param subarray The subarray to be set. - */ - void set_subarray_unsafe(const void* subarray); - /** Submits the query to the storage manager. */ Status submit(); diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index c8039c2721a..c0d2837d346 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -1143,7 +1143,7 @@ Status reader_from_capnp( auto subarray_reader = reader_reader.getSubarray(); Subarray subarray = subarray_from_capnp( subarray_reader, array, layout, query->stats(), dummy_logger); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + query->set_subarray(subarray); // Read state if (reader_reader.hasReadState()) @@ -1181,7 +1181,7 @@ Status index_reader_from_capnp( auto subarray_reader = reader_reader.getSubarray(); Subarray subarray = subarray_from_capnp( subarray_reader, array, layout, query->stats(), dummy_logger); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + query->set_subarray(subarray); // Read state if (reader_reader.hasReadState()) @@ -1220,7 +1220,7 @@ Status dense_reader_from_capnp( auto subarray_reader = reader_reader.getSubarray(); Subarray subarray = subarray_from_capnp( subarray_reader, array, layout, query->stats(), dummy_logger); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + query->set_subarray(subarray); // Read state if (reader_reader.hasReadState()) @@ -2233,7 +2233,7 @@ Status query_from_capnp( RETURN_NOT_OK( utils::deserialize_subarray(subarray_reader, schema, &subarray)); try { - query->set_subarray_unsafe(subarray); + query->set_subarray(subarray); } catch (...) { tdb_free(subarray); throw; @@ -2246,7 +2246,7 @@ Status query_from_capnp( auto subarray_reader = writer_reader.getSubarrayRanges(); Subarray subarray = subarray_from_capnp( subarray_reader, array, layout, query->stats(), dummy_logger); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + query->set_subarray(subarray); } } } else { @@ -3200,7 +3200,7 @@ void ordered_dim_label_reader_from_capnp( auto subarray_reader = reader_reader.getSubarray(); Subarray subarray = subarray_from_capnp( subarray_reader, array, layout, query->stats(), dummy_logger); - throw_if_not_ok(query->set_subarray_unsafe(subarray)); + query->set_subarray(subarray); // OrderedDimLabelReader requires an initialized subarray for construction. query->set_dimension_label_ordered_read( diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 3563f74f025..6d8958542b2 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -396,22 +396,6 @@ Status Subarray::set_subarray(const void* subarray) { return Status::Ok(); } -void Subarray::set_subarray_unsafe(const void* subarray) { - add_default_ranges(); - if (subarray != nullptr) { - auto dim_num = array_->array_schema_latest().dim_num(); - auto s_ptr = (const unsigned char*)subarray; - uint64_t offset = 0; - for (unsigned d = 0; d < dim_num; ++d) { - auto r_size = - 2 * array_->array_schema_latest().dimension_ptr(d)->coord_size(); - Range range(&s_ptr[offset], r_size); - throw_if_not_ok(this->add_range_unsafe(d, std::move(range))); - offset += r_size; - } - } -} - Status Subarray::add_range( unsigned dim_idx, const void* start, const void* end, const void* stride) { if (dim_idx >= this->array_->array_schema_latest().dim_num()) diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 4d8c350cf83..bc57f49da05 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -398,17 +398,6 @@ class Subarray { */ Status set_subarray(const void* subarray); - /** - * Sets the subarray using a pointer to raw range data that stores one range - * per dimension without performing validity checks. - * - * This is only valid for arrays with homogenous dimension data types. This - * function should only be used for deserializing dense write queries. - * - * @param subarray A pointer to the range data to use. - */ - void set_subarray_unsafe(const void* subarray); - /** * Adds dimension ranges computed from label ranges on the dimension label. * From cff8cdc090c827042789dbc200509b601d571256 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Mon, 12 Feb 2024 13:36:25 -0500 Subject: [PATCH 05/10] Clean code --- tiledb/sm/serialization/query.cc | 19 ++++++------------- tiledb/sm/subarray/subarray.cc | 18 ++---------------- tiledb/sm/subarray/subarray.h | 18 +----------------- 3 files changed, 9 insertions(+), 46 deletions(-) diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index c0d2837d346..2d7cb43b023 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -280,23 +280,17 @@ Subarray subarray_from_capnp( auto dim = array->array_schema_latest().dimension_ptr(i); bool implicitly_initialized = range_reader.getHasDefaultRange(); - range_subset[i] = - RangeSetAndSuperset(dim->type(), dim->domain(), true, coalesce_ranges); + range_subset[i] = RangeSetAndSuperset( + dim->type(), dim->domain(), implicitly_initialized, coalesce_ranges); is_default[i] = implicitly_initialized; if (range_reader.hasBufferSizes()) { auto ranges = range_buffers_from_capnp(range_reader); // If the range is implicitly initialized, the RangeSetAndSuperset // constructor will initialize the ranges to the domain. if (!implicitly_initialized) { - // Edge case for dimension labels where there are only label ranges set. - if (ranges.empty()) { - range_subset[i] = RangeSetAndSuperset( - dim->type(), dim->domain(), {dim->domain()}, coalesce_ranges); - } else { - // Add custom ranges, clearing any implicit ranges previously set. - range_subset[i] = RangeSetAndSuperset( - dim->type(), dim->domain(), ranges, coalesce_ranges); - } + // Add custom ranges, clearing any implicit ranges previously set. + range_subset[i] = RangeSetAndSuperset( + dim->type(), dim->domain(), ranges, coalesce_ranges); } } else { // Handle 1.7 style ranges where there is a single range with no sizes @@ -325,7 +319,6 @@ Subarray subarray_from_capnp( // Set ranges for this dim label on the subarray label_range_subset[dim_index] = { label_name, dim->type(), label_ranges, coalesce_ranges}; - range_subset[dim_index].clear(); is_default[dim_index] = false; } } @@ -361,7 +354,7 @@ Subarray subarray_from_capnp( auto frag_meta_size = array->opened_array()->fragment_metadata().size(); return { - array, + array->opened_array(), layout, reader.hasStats() ? s.stats() : parent_stats, logger, diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 6d8958542b2..d28044670f8 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -151,7 +151,7 @@ Subarray::Subarray( } Subarray::Subarray( - const Array* array, + const shared_ptr opened_array, Layout layout, stats::Stats* stats, shared_ptr logger, @@ -163,7 +163,7 @@ Subarray::Subarray( bool coalesce_ranges) : stats_(stats) , logger_(std::move(logger)) - , array_(array->opened_array()) + , array_(opened_array) , layout_(layout) , cell_order_(array_->array_schema_latest().cell_order()) , range_subset_(std::move(range_subset)) @@ -1791,20 +1791,6 @@ Status Subarray::set_ranges_for_dim( return Status::Ok(); } -void Subarray::set_label_ranges_for_dim( - const uint32_t dim_idx, - const std::string& name, - const std::vector& ranges) { - auto dim{array_->array_schema_latest().dimension_ptr(dim_idx)}; - label_range_subset_[dim_idx] = - LabelRangeSubset(name, dim->type(), coalesce_ranges_); - for (const auto& range : ranges) { - throw_if_not_ok( - label_range_subset_[dim_idx].value().ranges_.add_range_unrestricted( - range)); - } -} - Status Subarray::split( unsigned splitting_dim, const ByteVecValue& splitting_value, diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index bc57f49da05..08d17f2626d 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -342,7 +342,7 @@ class Subarray { * with existing ranges as they are added */ Subarray( - const Array* array, + const shared_ptr opened_array, Layout layout, stats::Stats* stats, shared_ptr logger, @@ -1188,22 +1188,6 @@ class Subarray { */ Status set_ranges_for_dim(uint32_t dim_idx, const std::vector& ranges); - /** - * Directly sets the dimension label ranges for the given dimension index, - * making a deep copy. - * - * @param dim_idx Index of dimension to set - * @param name Name of the dimension label to set - * @param ranges `Range` vector that will be copied and set - * @return Status - * - * @note Intended for serialization only - */ - void set_label_ranges_for_dim( - const uint32_t dim_idx, - const std::string& name, - const std::vector& ranges); - /** * Splits the subarray along the splitting dimension and value into * two new subarrays `r1` and `r2`. From 6c7991553741905aecf00ba9491ac8e943d4448d Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Mon, 12 Feb 2024 16:37:45 -0500 Subject: [PATCH 06/10] Update Stats deserialization --- test/support/src/serialization_wrappers.cc | 3 ++- tiledb/sm/serialization/query.cc | 10 +++------- tiledb/sm/subarray/subarray.cc | 9 +++------ tiledb/sm/subarray/subarray.h | 16 +++++----------- 4 files changed, 13 insertions(+), 25 deletions(-) diff --git a/test/support/src/serialization_wrappers.cc b/test/support/src/serialization_wrappers.cc index 0973739a67a..6cef46a3591 100644 --- a/test/support/src/serialization_wrappers.cc +++ b/test/support/src/serialization_wrappers.cc @@ -32,6 +32,7 @@ */ #include "test/support/src/helpers.h" +#include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/sm/c_api/tiledb.h" #include "tiledb/sm/c_api/tiledb_serialization.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" @@ -205,7 +206,7 @@ void tiledb_subarray_serialize( // Deserialize tiledb_subarray_t* deserialized_subarray; auto layout = (*subarray)->subarray_->layout(); - auto stats = (*subarray)->subarray_->stats(); + auto stats = ctx->storage_manager()->stats()->create_child("Subarray"); shared_ptr dummy_logger = make_shared(HERE(), ""); tiledb::test::require_tiledb_ok( diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 2d7cb43b023..1e6cb76445c 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -335,12 +335,7 @@ Subarray subarray_from_capnp( } } - // If cap'n proto object has stats set it on c++ object - Subarray s(array, layout, parent_stats, logger, true); - if (reader.hasStats()) { - auto stats_data = stats_from_capnp(reader.getStats()); - subarray->set_stats(stats_data); - } + const auto& stats_data = stats_from_capnp(reader.getStats()); std::vector relevant_fragments; if (reader.hasRelevantFragments()) { @@ -356,7 +351,8 @@ Subarray subarray_from_capnp( return { array->opened_array(), layout, - reader.hasStats() ? s.stats() : parent_stats, + parent_stats, + stats_data, logger, range_subset, is_default, diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index d28044670f8..6540da95785 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -153,7 +153,8 @@ Subarray::Subarray( Subarray::Subarray( const shared_ptr opened_array, Layout layout, - stats::Stats* stats, + stats::Stats* parent_stats, + const stats::StatsData& stats_data, shared_ptr logger, std::vector range_subset, std::vector is_default, @@ -161,7 +162,7 @@ Subarray::Subarray( std::unordered_map> attr_range_subset, RelevantFragments relevant_fragments, bool coalesce_ranges) - : stats_(stats) + : stats_(parent_stats->create_child("Subarray", stats_data)) , logger_(std::move(logger)) , array_(opened_array) , layout_(layout) @@ -3103,10 +3104,6 @@ const stats::Stats& Subarray::stats() const { return *stats_; } -void Subarray::set_stats(const stats::StatsData& data) { - stats_->populate_with_data(data); -} - tuple> Subarray::non_overlapping_ranges_for_dim( const uint64_t dim_idx) { const auto& ranges = range_subset_[dim_idx].ranges(); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 08d17f2626d..4ca07ef90f6 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -330,7 +330,8 @@ class Subarray { * if the subarray is used for reads, or of the values provided * by the user for writes). * @param parent_stats The parent stats to inherit from. - * @param logger The parent logger to clone and use for logging + * @param stats_data The stats data to use for the subarray. + * @param logger The parent logger to clone and use for logging. * @param range_subset Vector of RangeSetAndSuperset for each dimension. * @param is_default Vector of boolean indicating if the range is default. * @param label_range_subset Vector of optional for each @@ -339,12 +340,13 @@ class Subarray { * each attribute. * @param relevant_fragments RelevantFragments object for the subarray. * @param coalesce_ranges When enabled, ranges will attempt to coalesce - * with existing ranges as they are added + * with existing ranges as they are added. */ Subarray( const shared_ptr opened_array, Layout layout, - stats::Stats* stats, + stats::Stats* parent_stats, + const stats::StatsData& stats_data, shared_ptr logger, std::vector range_subset, std::vector is_default, @@ -1336,14 +1338,6 @@ class Subarray { /** Returns `stats_`. */ const stats::Stats& stats() const; - /** - * Populate the owned stats instance with data. - * To be removed when the class will get a C41 constructor. - * - * @param data Data to populate the stats with. - */ - void set_stats(const stats::StatsData& data); - /** Stores a vector of 1D ranges per dimension. */ std::vector> original_range_idx_; From b6009a62223c7bcc867505d5cacf32db44883a34 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Thu, 15 Feb 2024 14:33:24 -0500 Subject: [PATCH 07/10] Feedback from review --- tiledb/sm/serialization/query.cc | 38 ++++++++++++++++-------------- tiledb/sm/subarray/range_subset.cc | 21 +++++++++-------- tiledb/sm/subarray/range_subset.h | 33 +++++++++++++++++--------- 3 files changed, 53 insertions(+), 39 deletions(-) diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 1e6cb76445c..2b22630ff2a 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -271,33 +271,35 @@ Subarray subarray_from_capnp( auto ranges_reader = reader.getRanges(); uint32_t dim_num = ranges_reader.size(); - std::vector range_subset(dim_num); + std::vector range_subset; + range_subset.reserve(dim_num); std::vector is_default(dim_num, false); for (uint32_t i = 0; i < dim_num; i++) { auto range_reader = ranges_reader[i]; - Datatype type = Datatype::UINT8; - throw_if_not_ok(datatype_enum(range_reader.getType(), &type)); + Datatype type = datatype_enum(range_reader.getType()); auto dim = array->array_schema_latest().dimension_ptr(i); - bool implicitly_initialized = range_reader.getHasDefaultRange(); - range_subset[i] = RangeSetAndSuperset( - dim->type(), dim->domain(), implicitly_initialized, coalesce_ranges); - is_default[i] = implicitly_initialized; - if (range_reader.hasBufferSizes()) { - auto ranges = range_buffers_from_capnp(range_reader); + is_default[i] = range_reader.getHasDefaultRange(); + if (is_default[i]) { // If the range is implicitly initialized, the RangeSetAndSuperset // constructor will initialize the ranges to the domain. - if (!implicitly_initialized) { + range_subset.emplace_back( + type, dim->domain(), is_default[i], coalesce_ranges); + } else { + std::vector ranges; + if (range_reader.hasBufferSizes()) { + ranges = range_buffers_from_capnp(range_reader); // Add custom ranges, clearing any implicit ranges previously set. - range_subset[i] = RangeSetAndSuperset( - dim->type(), dim->domain(), ranges, coalesce_ranges); + range_subset.emplace_back( + type, dim->domain(), std::move(ranges), coalesce_ranges); + } else { + // Handle 1.7 style ranges where there is a single range with no sizes + auto data = range_reader.getBuffer(); + auto data_ptr = data.asBytes(); + ranges.emplace_back(data_ptr.begin(), data.size()); + range_subset.emplace_back( + type, dim->domain(), std::move(ranges), coalesce_ranges); } - } else { - // Handle 1.7 style ranges where there is a single range with no sizes - auto data = range_reader.getBuffer(); - auto data_ptr = data.asBytes(); - Range range(data_ptr.begin(), data.size()); - throw_if_not_ok(range_subset[i].add_range_unrestricted(range)); } } diff --git a/tiledb/sm/subarray/range_subset.cc b/tiledb/sm/subarray/range_subset.cc index 59822b7a37a..52453130f28 100644 --- a/tiledb/sm/subarray/range_subset.cc +++ b/tiledb/sm/subarray/range_subset.cc @@ -135,10 +135,11 @@ RangeSetAndSuperset::RangeSetAndSuperset( RangeSetAndSuperset::RangeSetAndSuperset( Datatype datatype, const Range& superset, - std::vector subset, + std::vector&& subset, bool coalesce_ranges) - : RangeSetAndSuperset(datatype, superset, false, coalesce_ranges) { - ranges_ = std::move(subset); + : impl_(range_subset_internals(datatype, superset, coalesce_ranges)) + , is_implicitly_initialized_(false) + , ranges_(subset) { } void RangeSetAndSuperset::sort_and_merge_ranges( @@ -171,6 +172,13 @@ tuple> RangeSetAndSuperset::add_range( } } +void RangeSetAndSuperset::check_oob() { + for (auto& range : ranges_) { + impl_->check_range_is_valid(range); + throw_if_not_ok(impl_->check_range_is_subset(range)); + } +} + Status RangeSetAndSuperset::add_range_unrestricted(const Range& range) { if (is_implicitly_initialized_) { ranges_.clear(); @@ -179,11 +187,4 @@ Status RangeSetAndSuperset::add_range_unrestricted(const Range& range) { return impl_->add_range(ranges_, range); } -void RangeSetAndSuperset::check_oob() { - for (auto& range : ranges_) { - impl_->check_range_is_valid(range); - throw_if_not_ok(impl_->check_range_is_subset(range)); - } -} - } // namespace tiledb::sm diff --git a/tiledb/sm/subarray/range_subset.h b/tiledb/sm/subarray/range_subset.h index f8b702b1ffe..555be0c4350 100644 --- a/tiledb/sm/subarray/range_subset.h +++ b/tiledb/sm/subarray/range_subset.h @@ -419,6 +419,9 @@ class TypedRangeSetAndFullsetImpl */ class RangeSetAndSuperset { public: + /** Friend Subarray for calling `add_range_unrestricted` */ + friend class Subarray; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -453,7 +456,7 @@ class RangeSetAndSuperset { RangeSetAndSuperset( Datatype datatype, const Range& superset, - std::vector subset, + std::vector&& subset, bool coalesce_ranges); /** Destructor. */ @@ -484,16 +487,6 @@ class RangeSetAndSuperset { tuple> add_range( Range& range, const bool read_range_oob_error = true); - /** - * Adds a range to the range manager without performing any checkes. - * - * If the ranges are currently implicitly initialized, then they will be - * cleared before the new range is added. - * - * @param range The range to add. - */ - Status add_range_unrestricted(const Range& range); - /** * Removes all ranges. * @@ -559,6 +552,10 @@ class RangeSetAndSuperset { void sort_and_merge_ranges(ThreadPool* const compute_tp, bool merge = false); private: + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + /** Pointer to typed implementation details. */ shared_ptr impl_ = nullptr; @@ -571,6 +568,20 @@ class RangeSetAndSuperset { /** Stored ranges. */ std::vector ranges_{}; + + /* ********************************* */ + /* PRIVATE METHODS */ + /* ********************************* */ + + /** + * Adds a range to the range manager without performing any checkes. + * + * If the ranges are currently implicitly initialized, then they will be + * cleared before the new range is added. + * + * @param range The range to add. + */ + Status add_range_unrestricted(const Range& range); }; } // namespace tiledb::sm From b86a68728a4e747107c30c81b95aaeed4ad14004 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Fri, 16 Feb 2024 10:36:08 -0500 Subject: [PATCH 08/10] Use reserve for label_range_subset --- tiledb/sm/serialization/query.cc | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 2b22630ff2a..9b9ec6b1759 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -303,8 +303,9 @@ Subarray subarray_from_capnp( } } - std::vector> label_range_subset( - dim_num, nullopt); + std::vector> label_range_subset; + label_range_subset.reserve(dim_num); + uint32_t last_dim = 0; if (reader.hasLabelRanges()) { auto label_ranges_reader = reader.getLabelRanges(); uint32_t label_num = label_ranges_reader.size(); @@ -312,6 +313,11 @@ Subarray subarray_from_capnp( auto label_range_reader = label_ranges_reader[i]; auto dim_index = label_range_reader.getDimensionId(); auto dim = array->array_schema_latest().dimension_ptr(dim_index); + + // Fill in any missing dimensions with nullopt + for (; last_dim < dim_index; last_dim++) { + label_range_subset.emplace_back(std::nullopt); + } auto label_name = label_range_reader.getName(); // Deserialize ranges for this dim label @@ -319,11 +325,19 @@ Subarray subarray_from_capnp( auto label_ranges = range_buffers_from_capnp(range_reader); // Set ranges for this dim label on the subarray - label_range_subset[dim_index] = { - label_name, dim->type(), label_ranges, coalesce_ranges}; + label_range_subset.emplace_back( + std::in_place, + label_name, + dim->type(), + label_ranges, + coalesce_ranges); is_default[dim_index] = false; } } + // Fill in label ranges with nullopt for any remaining dimensions + for (; last_dim < dim_num; last_dim++) { + label_range_subset.emplace_back(std::nullopt); + } std::unordered_map> attr_range_subset; if (reader.hasAttributeRanges()) { From 712fa4b38ad7b01c2fe0fff4dffb8f202e942db9 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Fri, 16 Feb 2024 13:36:37 -0500 Subject: [PATCH 09/10] Make Subarray::add_range_unsafe private --- test/src/unit-cppapi-subarray.cc | 109 +++++++++++++++++++++++++------ tiledb/sm/subarray/subarray.cc | 22 +++---- tiledb/sm/subarray/subarray.h | 15 +++-- 3 files changed, 109 insertions(+), 37 deletions(-) diff --git a/test/src/unit-cppapi-subarray.cc b/test/src/unit-cppapi-subarray.cc index 96455c04ff7..ad9b9a899bd 100644 --- a/test/src/unit-cppapi-subarray.cc +++ b/test/src/unit-cppapi-subarray.cc @@ -463,50 +463,119 @@ TEST_CASE( Array array_r(ctx, array_name, TILEDB_READ); Subarray subarray(ctx, array_r); + // If read_range_oob_error is false, the range will be cropped with a + // warning and the query will succeed. + auto read_range_oob_error = GENERATE(true, false); auto expected = TILEDB_ERR; + int fill_val = tiledb::sm::constants::empty_int32; + std::vector expected_data(16, fill_val); + expected_data[0] = 1; + expected_data[1] = 2; + expected_data[4] = 3; + expected_data[5] = 4; SECTION("- Upper bound OOB") { int range[] = {0, 100}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); + if (read_range_oob_error) { + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + } else { + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + // The subarray will warn and crop to full domain ranges. + expected = TILEDB_OK; + } } SECTION("- Lower bound OOB") { int range[] = {-1, 2}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); + if (read_range_oob_error) { + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + } else { + // Warn and crop dim0 to [0, 2] with [0, 3] implicitly set on dim1. + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + expected_data.resize(12); + expected = TILEDB_OK; + } } SECTION("- Second range OOB") { int range[] = {1, 4}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); int range2[] = {10, 20}; auto r2 = Range(&range2[0], &range2[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(1, r2).ok()); + if (read_range_oob_error) { + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(1, std::move(r2), read_range_oob_error) + .ok()); + } else { + // Warn and crop dim0 to [1, 3] and dim1 to [3, 3] + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(1, std::move(r2), read_range_oob_error) + .ok()); + expected_data = {fill_val, fill_val, fill_val}; + expected = TILEDB_OK; + } } SECTION("- Valid ranges") { int range[] = {0, 1}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(1, r).ok()); + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(1, std::move(r), read_range_oob_error) + .ok()); + expected_data = data_w; expected = TILEDB_OK; } - Query query(ctx, array_r); - query.set_subarray(subarray); - query.set_config(cfg); - - std::vector a(4); - query.set_data_buffer("a", a); - tiledb::test::ServerQueryBuffers buffers; - CHECK( - submit_query_wrapper( - ctx, array_name, &query, buffers, true, query_v2, false) == expected); - - if (expected == TILEDB_OK) { - CHECK(query.query_status() == tiledb::Query::Status::COMPLETE); - CHECK(a == std::vector{1, 2, 3, 4}); + // If the Subarray threw an exception when adding OOB ranges it will be unset. + if (!read_range_oob_error || expected == TILEDB_OK) { + Query query(ctx, array_r); + query.set_subarray(subarray); + query.set_config(cfg); + + std::vector a(expected_data.size()); + query.set_data_buffer("a", a); + tiledb::test::ServerQueryBuffers buffers; + CHECK( + submit_query_wrapper( + ctx, array_name, &query, buffers, true, query_v2, false) == + expected); + + if (expected == TILEDB_OK) { + CHECK(query.query_status() == tiledb::Query::Status::COMPLETE); + CHECK(a == expected_data); + } } if (vfs.is_dir(array_name)) { diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 6540da95785..e4f509bc80d 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -358,17 +358,6 @@ Status Subarray::add_range( return Status::Ok(); } -Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { - // Must reset the result size and tile overlap - est_result_size_computed_ = false; - tile_overlap_.clear(); - - // Add the range - throw_if_not_ok(range_subset_[dim_idx].add_range_unrestricted(range)); - is_default_[dim_idx] = range_subset_[dim_idx].is_implicitly_initialized(); - return Status::Ok(); -} - Status Subarray::set_subarray(const void* subarray) { if (!array_->array_schema_latest().domain().all_dims_same_type()) return LOG_STATUS( @@ -2123,6 +2112,17 @@ void Subarray::add_default_ranges() { add_default_label_ranges(dim_num); } +Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { + // Must reset the result size and tile overlap + est_result_size_computed_ = false; + tile_overlap_.clear(); + + // Add the range + throw_if_not_ok(range_subset_[dim_idx].add_range_unrestricted(range)); + is_default_[dim_idx] = range_subset_[dim_idx].is_implicitly_initialized(); + return Status::Ok(); +} + void Subarray::add_default_label_ranges(dimension_size_type dim_num) { label_range_subset_.clear(); label_range_subset_.resize(dim_num, nullopt); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 4ca07ef90f6..cc51be6172a 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -151,6 +151,9 @@ class ITileRange { */ class Subarray { public: + /** Friend with Query to call `add_range_unsafe` */ + friend class Query; + /* ********************************* */ /* PUBLIC DATA TYPES */ /* ********************************* */ @@ -524,12 +527,6 @@ class Subarray { const void* end, uint64_t end_size); - /** - * Adds a range along the dimension with the given index, without - * performing any error checks. - */ - Status add_range_unsafe(uint32_t dim_idx, const Range& range); - /** * Adds a range to the (read/write) query on the input dimension by name, * in the form of (start, end, stride). @@ -1554,6 +1551,12 @@ class Subarray { */ void add_default_ranges(); + /** + * Adds a range along the dimension with the given index, without + * performing any error checks. + */ + Status add_range_unsafe(uint32_t dim_idx, const Range& range); + /** Computes the estimated result size for all attributes/dimensions. */ Status compute_est_result_size(const Config* config, ThreadPool* compute_tp); From c9e47264dc69e6d1f411cbb44d6279786d78c431 Mon Sep 17 00:00:00 2001 From: Luc Rancourt Date: Thu, 29 Feb 2024 10:16:24 +0100 Subject: [PATCH 10/10] Making methods public again. --- tiledb/sm/subarray/range_subset.cc | 14 +++++++------- tiledb/sm/subarray/range_subset.h | 27 ++++++++++----------------- tiledb/sm/subarray/subarray.cc | 22 +++++++++++----------- tiledb/sm/subarray/subarray.h | 15 ++++++--------- 4 files changed, 34 insertions(+), 44 deletions(-) diff --git a/tiledb/sm/subarray/range_subset.cc b/tiledb/sm/subarray/range_subset.cc index 52453130f28..42604b0ad6c 100644 --- a/tiledb/sm/subarray/range_subset.cc +++ b/tiledb/sm/subarray/range_subset.cc @@ -172,13 +172,6 @@ tuple> RangeSetAndSuperset::add_range( } } -void RangeSetAndSuperset::check_oob() { - for (auto& range : ranges_) { - impl_->check_range_is_valid(range); - throw_if_not_ok(impl_->check_range_is_subset(range)); - } -} - Status RangeSetAndSuperset::add_range_unrestricted(const Range& range) { if (is_implicitly_initialized_) { ranges_.clear(); @@ -187,4 +180,11 @@ Status RangeSetAndSuperset::add_range_unrestricted(const Range& range) { return impl_->add_range(ranges_, range); } +void RangeSetAndSuperset::check_oob() { + for (auto& range : ranges_) { + impl_->check_range_is_valid(range); + throw_if_not_ok(impl_->check_range_is_subset(range)); + } +} + } // namespace tiledb::sm diff --git a/tiledb/sm/subarray/range_subset.h b/tiledb/sm/subarray/range_subset.h index 555be0c4350..eb3fab79e5b 100644 --- a/tiledb/sm/subarray/range_subset.h +++ b/tiledb/sm/subarray/range_subset.h @@ -419,9 +419,6 @@ class TypedRangeSetAndFullsetImpl */ class RangeSetAndSuperset { public: - /** Friend Subarray for calling `add_range_unrestricted` */ - friend class Subarray; - /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -487,6 +484,16 @@ class RangeSetAndSuperset { tuple> add_range( Range& range, const bool read_range_oob_error = true); + /** + * Adds a range to the range manager without performing any checkes. + * + * If the ranges are currently implicitly initialized, then they will be + * cleared before the new range is added. + * + * @param range The range to add. + */ + Status add_range_unrestricted(const Range& range); + /** * Removes all ranges. * @@ -568,20 +575,6 @@ class RangeSetAndSuperset { /** Stored ranges. */ std::vector ranges_{}; - - /* ********************************* */ - /* PRIVATE METHODS */ - /* ********************************* */ - - /** - * Adds a range to the range manager without performing any checkes. - * - * If the ranges are currently implicitly initialized, then they will be - * cleared before the new range is added. - * - * @param range The range to add. - */ - Status add_range_unrestricted(const Range& range); }; } // namespace tiledb::sm diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index e4f509bc80d..3674f4a66f2 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -425,6 +425,17 @@ Status Subarray::add_range( dim_idx, Range(&range[0], 2 * coord_size), err_on_range_oob_); } +Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { + // Must reset the result size and tile overlap + est_result_size_computed_ = false; + tile_overlap_.clear(); + + // Add the range + throw_if_not_ok(range_subset_[dim_idx].add_range_unrestricted(range)); + is_default_[dim_idx] = range_subset_[dim_idx].is_implicitly_initialized(); + return Status::Ok(); +} + Status Subarray::add_point_ranges( unsigned dim_idx, const void* start, uint64_t count, bool check_for_label) { if (dim_idx >= this->array_->array_schema_latest().dim_num()) { @@ -2112,17 +2123,6 @@ void Subarray::add_default_ranges() { add_default_label_ranges(dim_num); } -Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { - // Must reset the result size and tile overlap - est_result_size_computed_ = false; - tile_overlap_.clear(); - - // Add the range - throw_if_not_ok(range_subset_[dim_idx].add_range_unrestricted(range)); - is_default_[dim_idx] = range_subset_[dim_idx].is_implicitly_initialized(); - return Status::Ok(); -} - void Subarray::add_default_label_ranges(dimension_size_type dim_num) { label_range_subset_.clear(); label_range_subset_.resize(dim_num, nullopt); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index cc51be6172a..8ab378a4c7a 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -151,9 +151,6 @@ class ITileRange { */ class Subarray { public: - /** Friend with Query to call `add_range_unsafe` */ - friend class Query; - /* ********************************* */ /* PUBLIC DATA TYPES */ /* ********************************* */ @@ -478,6 +475,12 @@ class Subarray { Status add_range( unsigned dim_idx, const void* start, const void* end, const void* stride); + /** + * Adds a range along the dimension with the given index, without + * performing any error checks. + */ + Status add_range_unsafe(uint32_t dim_idx, const Range& range); + /** * @brief Set point ranges from an array * @@ -1551,12 +1554,6 @@ class Subarray { */ void add_default_ranges(); - /** - * Adds a range along the dimension with the given index, without - * performing any error checks. - */ - Status add_range_unsafe(uint32_t dim_idx, const Range& range); - /** Computes the estimated result size for all attributes/dimensions. */ Status compute_est_result_size(const Config* config, ThreadPool* compute_tp);