diff --git a/test/src/unit-cppapi-subarray.cc b/test/src/unit-cppapi-subarray.cc index 96455c04ff7..ad9b9a899bd 100644 --- a/test/src/unit-cppapi-subarray.cc +++ b/test/src/unit-cppapi-subarray.cc @@ -463,50 +463,119 @@ TEST_CASE( Array array_r(ctx, array_name, TILEDB_READ); Subarray subarray(ctx, array_r); + // If read_range_oob_error is false, the range will be cropped with a + // warning and the query will succeed. + auto read_range_oob_error = GENERATE(true, false); auto expected = TILEDB_ERR; + int fill_val = tiledb::sm::constants::empty_int32; + std::vector expected_data(16, fill_val); + expected_data[0] = 1; + expected_data[1] = 2; + expected_data[4] = 3; + expected_data[5] = 4; SECTION("- Upper bound OOB") { int range[] = {0, 100}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); + if (read_range_oob_error) { + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + } else { + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + // The subarray will warn and crop to full domain ranges. + expected = TILEDB_OK; + } } SECTION("- Lower bound OOB") { int range[] = {-1, 2}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); + if (read_range_oob_error) { + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + } else { + // Warn and crop dim0 to [0, 2] with [0, 3] implicitly set on dim1. + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + expected_data.resize(12); + expected = TILEDB_OK; + } } SECTION("- Second range OOB") { int range[] = {1, 4}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); int range2[] = {10, 20}; auto r2 = Range(&range2[0], &range2[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(1, r2).ok()); + if (read_range_oob_error) { + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + CHECK_FALSE( + subarray.ptr() + .get() + ->subarray_->add_range(1, std::move(r2), read_range_oob_error) + .ok()); + } else { + // Warn and crop dim0 to [1, 3] and dim1 to [3, 3] + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(1, std::move(r2), read_range_oob_error) + .ok()); + expected_data = {fill_val, fill_val, fill_val}; + expected = TILEDB_OK; + } } SECTION("- Valid ranges") { int range[] = {0, 1}; auto r = Range(&range[0], &range[1], sizeof(int)); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(0, r).ok()); - CHECK(subarray.ptr().get()->subarray_->add_range_unsafe(1, r).ok()); + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(0, std::move(r), read_range_oob_error) + .ok()); + CHECK(subarray.ptr() + .get() + ->subarray_->add_range(1, std::move(r), read_range_oob_error) + .ok()); + expected_data = data_w; expected = TILEDB_OK; } - Query query(ctx, array_r); - query.set_subarray(subarray); - query.set_config(cfg); - - std::vector a(4); - query.set_data_buffer("a", a); - tiledb::test::ServerQueryBuffers buffers; - CHECK( - submit_query_wrapper( - ctx, array_name, &query, buffers, true, query_v2, false) == expected); - - if (expected == TILEDB_OK) { - CHECK(query.query_status() == tiledb::Query::Status::COMPLETE); - CHECK(a == std::vector{1, 2, 3, 4}); + // If the Subarray threw an exception when adding OOB ranges it will be unset. + if (!read_range_oob_error || expected == TILEDB_OK) { + Query query(ctx, array_r); + query.set_subarray(subarray); + query.set_config(cfg); + + std::vector a(expected_data.size()); + query.set_data_buffer("a", a); + tiledb::test::ServerQueryBuffers buffers; + CHECK( + submit_query_wrapper( + ctx, array_name, &query, buffers, true, query_v2, false) == + expected); + + if (expected == TILEDB_OK) { + CHECK(query.query_status() == tiledb::Query::Status::COMPLETE); + CHECK(a == expected_data); + } } if (vfs.is_dir(array_name)) { diff --git a/test/support/src/serialization_wrappers.cc b/test/support/src/serialization_wrappers.cc index fa7dec6fc12..6cef46a3591 100644 --- a/test/support/src/serialization_wrappers.cc +++ b/test/support/src/serialization_wrappers.cc @@ -32,6 +32,7 @@ */ #include "test/support/src/helpers.h" +#include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/sm/c_api/tiledb.h" #include "tiledb/sm/c_api/tiledb_serialization.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" @@ -204,11 +205,15 @@ void tiledb_subarray_serialize( .ok()); // Deserialize tiledb_subarray_t* deserialized_subarray; + auto layout = (*subarray)->subarray_->layout(); + auto stats = ctx->storage_manager()->stats()->create_child("Subarray"); + shared_ptr dummy_logger = make_shared(HERE(), ""); + tiledb::test::require_tiledb_ok( ctx, tiledb_subarray_alloc(ctx, array, &deserialized_subarray)); - REQUIRE(tiledb::sm::serialization::subarray_from_capnp( - builder, deserialized_subarray->subarray_) - .ok()); + *deserialized_subarray->subarray_ = + tiledb::sm::serialization::subarray_from_capnp( + builder, array->array_.get(), layout, stats, dummy_logger); *subarray = deserialized_subarray; #endif } diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 9e3880646dc..7f67e80b2aa 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -1528,11 +1528,6 @@ const Subarray* Query::subarray() const { return &subarray_; } -Status Query::set_subarray_unsafe(const Subarray& subarray) { - subarray_ = subarray; - return Status::Ok(); -} - void Query::set_subarray(const tiledb::sm::Subarray& subarray) { // Perform checks related to the query type. switch (type_) { @@ -1583,10 +1578,6 @@ Status Query::set_subarray_unsafe(const NDRange& subarray) { return Status::Ok(); } -void Query::set_subarray_unsafe(const void* subarray) { - subarray_.set_subarray_unsafe(subarray); -} - Status Query::submit() { // Do not resubmit completed reads. if (type_ == QueryType::READ && status_ == QueryStatus::COMPLETED) { diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 520934c61df..04469841483 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -617,21 +617,9 @@ class Query { */ void set_subarray(const tiledb::sm::Subarray& subarray); - /** Sets the query subarray, without performing any checks. */ - Status set_subarray_unsafe(const Subarray& subarray); - /** Sets the query subarray, without performing any checks. */ Status set_subarray_unsafe(const NDRange& subarray); - /** - * Sets the query subarray without performing any checks. - * - * Used for deserialize dense writes. - * - * @param subarray The subarray to be set. - */ - void set_subarray_unsafe(const void* subarray); - /** Submits the query to the storage manager. */ Status submit(); diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index e3b0cd7c95c..9b9ec6b1759 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -261,84 +261,122 @@ Status subarray_to_capnp( return Status::Ok(); } -Status subarray_from_capnp( - const capnp::Subarray::Reader& reader, Subarray* subarray) { - RETURN_NOT_OK(subarray->set_coalesce_ranges(reader.getCoalesceRanges())); +Subarray subarray_from_capnp( + const capnp::Subarray::Reader& reader, + const Array* array, + Layout layout, + stats::Stats* parent_stats, + shared_ptr logger) { + bool coalesce_ranges = reader.getCoalesceRanges(); auto ranges_reader = reader.getRanges(); + uint32_t dim_num = ranges_reader.size(); + std::vector range_subset; + range_subset.reserve(dim_num); + std::vector is_default(dim_num, false); for (uint32_t i = 0; i < dim_num; i++) { auto range_reader = ranges_reader[i]; - - auto data = range_reader.getBuffer(); - auto data_ptr = data.asBytes(); - if (range_reader.hasBufferSizes()) { - auto ranges = range_buffers_from_capnp(range_reader); - RETURN_NOT_OK(subarray->set_ranges_for_dim(i, ranges)); - - // Set default indicator - subarray->set_is_default(i, range_reader.getHasDefaultRange()); + Datatype type = datatype_enum(range_reader.getType()); + auto dim = array->array_schema_latest().dimension_ptr(i); + + is_default[i] = range_reader.getHasDefaultRange(); + if (is_default[i]) { + // If the range is implicitly initialized, the RangeSetAndSuperset + // constructor will initialize the ranges to the domain. + range_subset.emplace_back( + type, dim->domain(), is_default[i], coalesce_ranges); } else { - // Handle 1.7 style ranges where there is a single range with no sizes - Range range(data_ptr.begin(), data.size()); - RETURN_NOT_OK(subarray->set_ranges_for_dim(i, {range})); - subarray->set_is_default(i, range_reader.getHasDefaultRange()); + std::vector ranges; + if (range_reader.hasBufferSizes()) { + ranges = range_buffers_from_capnp(range_reader); + // Add custom ranges, clearing any implicit ranges previously set. + range_subset.emplace_back( + type, dim->domain(), std::move(ranges), coalesce_ranges); + } else { + // Handle 1.7 style ranges where there is a single range with no sizes + auto data = range_reader.getBuffer(); + auto data_ptr = data.asBytes(); + ranges.emplace_back(data_ptr.begin(), data.size()); + range_subset.emplace_back( + type, dim->domain(), std::move(ranges), coalesce_ranges); + } } } + std::vector> label_range_subset; + label_range_subset.reserve(dim_num); + uint32_t last_dim = 0; if (reader.hasLabelRanges()) { - subarray->add_default_label_ranges(dim_num); auto label_ranges_reader = reader.getLabelRanges(); uint32_t label_num = label_ranges_reader.size(); for (uint32_t i = 0; i < label_num; i++) { auto label_range_reader = label_ranges_reader[i]; - auto dim_id = label_range_reader.getDimensionId(); + auto dim_index = label_range_reader.getDimensionId(); + auto dim = array->array_schema_latest().dimension_ptr(dim_index); + + // Fill in any missing dimensions with nullopt + for (; last_dim < dim_index; last_dim++) { + label_range_subset.emplace_back(std::nullopt); + } auto label_name = label_range_reader.getName(); // Deserialize ranges for this dim label auto range_reader = label_range_reader.getRanges(); - auto ranges = range_buffers_from_capnp(range_reader); + auto label_ranges = range_buffers_from_capnp(range_reader); // Set ranges for this dim label on the subarray - subarray->set_label_ranges_for_dim(dim_id, label_name, ranges); + label_range_subset.emplace_back( + std::in_place, + label_name, + dim->type(), + label_ranges, + coalesce_ranges); + is_default[dim_index] = false; } } + // Fill in label ranges with nullopt for any remaining dimensions + for (; last_dim < dim_num; last_dim++) { + label_range_subset.emplace_back(std::nullopt); + } + std::unordered_map> attr_range_subset; if (reader.hasAttributeRanges()) { - std::unordered_map> attr_ranges; auto attr_ranges_reader = reader.getAttributeRanges(); if (attr_ranges_reader.hasEntries()) { - for (auto attr_ranges_entry : attr_ranges_reader.getEntries()) { - auto range_reader = attr_ranges_entry.getValue(); - auto key = std::string_view{ - attr_ranges_entry.getKey().cStr(), - attr_ranges_entry.getKey().size()}; - attr_ranges[std::string{key}] = range_buffers_from_capnp(range_reader); + for (auto entry : attr_ranges_reader.getEntries()) { + auto range_reader = entry.getValue(); + std::string key{entry.getKey().cStr(), entry.getKey().size()}; + attr_range_subset[key] = range_buffers_from_capnp(range_reader); } } - - for (const auto& attr_range : attr_ranges) - subarray->set_attribute_ranges(attr_range.first, attr_range.second); } - // If cap'n proto object has stats set it on c++ object - if (reader.hasStats()) { - auto stats_data = stats_from_capnp(reader.getStats()); - subarray->set_stats(stats_data); - } + const auto& stats_data = stats_from_capnp(reader.getStats()); + std::vector relevant_fragments; if (reader.hasRelevantFragments()) { - auto relevant_fragments = reader.getRelevantFragments(); - size_t count = relevant_fragments.size(); - std::vector rf; - rf.reserve(count); + auto reader_rf = reader.getRelevantFragments(); + size_t count = reader_rf.size(); + relevant_fragments.reserve(count); for (size_t i = 0; i < count; i++) { - rf.emplace_back(relevant_fragments[i]); - } - - subarray->relevant_fragments() = RelevantFragments(rf); - } - - return Status::Ok(); + relevant_fragments.emplace_back(reader_rf[i]); + } + } + + auto frag_meta_size = array->opened_array()->fragment_metadata().size(); + return { + array->opened_array(), + layout, + parent_stats, + stats_data, + logger, + range_subset, + is_default, + label_range_subset, + attr_range_subset, + reader.hasRelevantFragments() ? relevant_fragments : + RelevantFragments(frag_meta_size), + coalesce_ranges}; } Status subarray_partitioner_to_capnp( @@ -450,8 +488,8 @@ Status subarray_partitioner_from_capnp( RETURN_NOT_OK(layout_enum(subarray_reader.getLayout(), &layout)); // Subarray, which is used to initialize the partitioner. - Subarray subarray(array, layout, query_stats, dummy_logger, true); - RETURN_NOT_OK(subarray_from_capnp(reader.getSubarray(), &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); *partitioner = SubarrayPartitioner( &config, subarray, @@ -508,10 +546,12 @@ Status subarray_partitioner_from_capnp( partition_info->end_ = partition_info_reader.getEnd(); partition_info->split_multi_range_ = partition_info_reader.getSplitMultiRange(); - partition_info->partition_ = - Subarray(array, layout, query_stats, dummy_logger, true); - RETURN_NOT_OK(subarray_from_capnp( - partition_info_reader.getSubarray(), &partition_info->partition_)); + partition_info->partition_ = subarray_from_capnp( + partition_info_reader.getSubarray(), + array, + layout, + query_stats, + dummy_logger); if (compute_current_tile_overlap) { throw_if_not_ok(partition_info->partition_.precompute_tile_overlap( @@ -531,20 +571,18 @@ Status subarray_partitioner_from_capnp( auto sr_reader = state_reader.getSingleRange(); const unsigned num_sr = sr_reader.size(); for (unsigned i = 0; i < num_sr; i++) { - auto subarray_reader_ = sr_reader[i]; - state->single_range_.emplace_back( - array, layout, query_stats, dummy_logger, true); - Subarray& subarray_ = state->single_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); + auto subarray_reader = sr_reader[i]; + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); + state->single_range_.push_back(subarray); } auto m_reader = state_reader.getMultiRange(); const unsigned num_m = m_reader.size(); for (unsigned i = 0; i < num_m; i++) { - auto subarray_reader_ = m_reader[i]; - state->multi_range_.emplace_back( - array, layout, query_stats, dummy_logger, true); - Subarray& subarray_ = state->multi_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); + auto subarray_reader = m_reader[i]; + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); + state->multi_range_.push_back(subarray); } // Overall mem budget @@ -1107,10 +1145,10 @@ Status reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); + query->set_subarray(subarray); // Read state if (reader_reader.hasReadState()) @@ -1145,10 +1183,10 @@ Status index_reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); + query->set_subarray(subarray); // Read state if (reader_reader.hasReadState()) @@ -1184,10 +1222,10 @@ Status dense_reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); + query->set_subarray(subarray); // Read state if (reader_reader.hasReadState()) @@ -2200,7 +2238,7 @@ Status query_from_capnp( RETURN_NOT_OK( utils::deserialize_subarray(subarray_reader, schema, &subarray)); try { - query->set_subarray_unsafe(subarray); + query->set_subarray(subarray); } catch (...) { tdb_free(subarray); throw; @@ -2210,10 +2248,10 @@ Status query_from_capnp( // Subarray if (writer_reader.hasSubarrayRanges()) { - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = writer_reader.getSubarrayRanges(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); - RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); + query->set_subarray(subarray); } } } else { @@ -3164,10 +3202,10 @@ void ordered_dim_label_reader_from_capnp( throw_if_not_ok(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, false); auto subarray_reader = reader_reader.getSubarray(); - throw_if_not_ok(subarray_from_capnp(subarray_reader, &subarray)); - throw_if_not_ok(query->set_subarray_unsafe(subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); + query->set_subarray(subarray); // OrderedDimLabelReader requires an initialized subarray for construction. query->set_dimension_label_ordered_read( diff --git a/tiledb/sm/serialization/query.h b/tiledb/sm/serialization/query.h index 568faacf0f0..09bfc75df40 100644 --- a/tiledb/sm/serialization/query.h +++ b/tiledb/sm/serialization/query.h @@ -252,8 +252,12 @@ Status subarray_to_capnp( const Subarray* subarray, capnp::Subarray::Builder* builder); -Status subarray_from_capnp( - const capnp::Subarray::Reader& reader, Subarray* subarray); +Subarray subarray_from_capnp( + const capnp::Subarray::Reader& reader, + const Array* array, + Layout layout, + stats::Stats* parent_stats, + shared_ptr logger); void ordered_dim_label_reader_to_capnp( const Query& query, diff --git a/tiledb/sm/subarray/range_subset.cc b/tiledb/sm/subarray/range_subset.cc index 7aa6df657c5..42604b0ad6c 100644 --- a/tiledb/sm/subarray/range_subset.cc +++ b/tiledb/sm/subarray/range_subset.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/subarray/range_subset.h" #include +#include using namespace tiledb::common; using namespace tiledb::type; @@ -131,6 +132,16 @@ RangeSetAndSuperset::RangeSetAndSuperset( ranges_.emplace_back(superset); } +RangeSetAndSuperset::RangeSetAndSuperset( + Datatype datatype, + const Range& superset, + std::vector&& subset, + bool coalesce_ranges) + : impl_(range_subset_internals(datatype, superset, coalesce_ranges)) + , is_implicitly_initialized_(false) + , ranges_(subset) { +} + void RangeSetAndSuperset::sort_and_merge_ranges( ThreadPool* const compute_tp, bool merge) { if (ranges_.empty()) { diff --git a/tiledb/sm/subarray/range_subset.h b/tiledb/sm/subarray/range_subset.h index 16b12ab4471..eb3fab79e5b 100644 --- a/tiledb/sm/subarray/range_subset.h +++ b/tiledb/sm/subarray/range_subset.h @@ -429,10 +429,11 @@ class RangeSetAndSuperset { /** General constructor * * @param datatype The TileDB datatype of of the ranges. + * @param superset The superset of ranges to initialize the range set. * @param implicitly_initialize If ``true``, set the ranges to contain the - * full superset until a new range is explicitly added. + * full superset until a new range is explicitly added. * @param coalesce_ranges If ``true``, when adding a new range, attempt to - * combine with the first left-adjacent range found. + * combine with the first left-adjacent range found. **/ RangeSetAndSuperset( Datatype datatype, @@ -440,6 +441,21 @@ class RangeSetAndSuperset { bool implicitly_initialize, bool coalesce_ranges); + /** + * Constructor. + * + * @param datatype The TileDB datatype of of the ranges. + * @param superset The superset of ranges to initialize the range set. + * @param subset The subset of ranges to initialize the range set. + * @param coalesce_ranges If ``true``, when adding a new range, attempt to + * combine with the first left-adjacent range found. + **/ + RangeSetAndSuperset( + Datatype datatype, + const Range& superset, + std::vector&& subset, + bool coalesce_ranges); + /** Destructor. */ ~RangeSetAndSuperset() = default; @@ -543,6 +559,10 @@ class RangeSetAndSuperset { void sort_and_merge_ranges(ThreadPool* const compute_tp, bool merge = false); private: + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + /** Pointer to typed implementation details. */ shared_ptr impl_ = nullptr; diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 6ea1846152b..3674f4a66f2 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -150,6 +150,33 @@ Subarray::Subarray( add_default_ranges(); } +Subarray::Subarray( + const shared_ptr opened_array, + Layout layout, + stats::Stats* parent_stats, + const stats::StatsData& stats_data, + shared_ptr logger, + std::vector range_subset, + std::vector is_default, + std::vector> label_range_subset, + std::unordered_map> attr_range_subset, + RelevantFragments relevant_fragments, + bool coalesce_ranges) + : stats_(parent_stats->create_child("Subarray", stats_data)) + , logger_(std::move(logger)) + , array_(opened_array) + , layout_(layout) + , cell_order_(array_->array_schema_latest().cell_order()) + , range_subset_(std::move(range_subset)) + , label_range_subset_(std::move(label_range_subset)) + , attr_range_subset_(std::move(attr_range_subset)) + , is_default_(std::move(is_default)) + , est_result_size_computed_(false) + , relevant_fragments_(std::move(relevant_fragments)) + , coalesce_ranges_(coalesce_ranges) + , ranges_sorted_(false) { +} + Subarray::Subarray(const Subarray& subarray) : Subarray() { // Make a deep-copy clone @@ -331,17 +358,6 @@ Status Subarray::add_range( return Status::Ok(); } -Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { - // Must reset the result size and tile overlap - est_result_size_computed_ = false; - tile_overlap_.clear(); - - // Add the range - throw_if_not_ok(range_subset_[dim_idx].add_range_unrestricted(range)); - is_default_[dim_idx] = range_subset_[dim_idx].is_implicitly_initialized(); - return Status::Ok(); -} - Status Subarray::set_subarray(const void* subarray) { if (!array_->array_schema_latest().domain().all_dims_same_type()) return LOG_STATUS( @@ -370,22 +386,6 @@ Status Subarray::set_subarray(const void* subarray) { return Status::Ok(); } -void Subarray::set_subarray_unsafe(const void* subarray) { - add_default_ranges(); - if (subarray != nullptr) { - auto dim_num = array_->array_schema_latest().dim_num(); - auto s_ptr = (const unsigned char*)subarray; - uint64_t offset = 0; - for (unsigned d = 0; d < dim_num; ++d) { - auto r_size = - 2 * array_->array_schema_latest().dimension_ptr(d)->coord_size(); - Range range(&s_ptr[offset], r_size); - throw_if_not_ok(this->add_range_unsafe(d, std::move(range))); - offset += r_size; - } - } -} - Status Subarray::add_range( unsigned dim_idx, const void* start, const void* end, const void* stride) { if (dim_idx >= this->array_->array_schema_latest().dim_num()) @@ -425,6 +425,17 @@ Status Subarray::add_range( dim_idx, Range(&range[0], 2 * coord_size), err_on_range_oob_); } +Status Subarray::add_range_unsafe(uint32_t dim_idx, const Range& range) { + // Must reset the result size and tile overlap + est_result_size_computed_ = false; + tile_overlap_.clear(); + + // Add the range + throw_if_not_ok(range_subset_[dim_idx].add_range_unrestricted(range)); + is_default_[dim_idx] = range_subset_[dim_idx].is_implicitly_initialized(); + return Status::Ok(); +} + Status Subarray::add_point_ranges( unsigned dim_idx, const void* start, uint64_t count, bool check_for_label) { if (dim_idx >= this->array_->array_schema_latest().dim_num()) { @@ -1781,20 +1792,6 @@ Status Subarray::set_ranges_for_dim( return Status::Ok(); } -void Subarray::set_label_ranges_for_dim( - const uint32_t dim_idx, - const std::string& name, - const std::vector& ranges) { - auto dim{array_->array_schema_latest().dimension_ptr(dim_idx)}; - label_range_subset_[dim_idx] = - LabelRangeSubset(name, dim->type(), coalesce_ranges_); - for (const auto& range : ranges) { - throw_if_not_ok( - label_range_subset_[dim_idx].value().ranges_.add_range_unrestricted( - range)); - } -} - Status Subarray::split( unsigned splitting_dim, const ByteVecValue& splitting_value, @@ -3107,10 +3104,6 @@ const stats::Stats& Subarray::stats() const { return *stats_; } -void Subarray::set_stats(const stats::StatsData& data) { - stats_->populate_with_data(data); -} - tuple> Subarray::non_overlapping_ranges_for_dim( const uint64_t dim_idx) { const auto& ranges = range_subset_[dim_idx].ranges(); @@ -3261,5 +3254,15 @@ Subarray::LabelRangeSubset::LabelRangeSubset( , ranges_{RangeSetAndSuperset(type, Range(), false, coalesce_ranges)} { } +Subarray::LabelRangeSubset::LabelRangeSubset( + const std::string& name, + Datatype type, + std::vector ranges, + bool coalesce_ranges) + : name_{name} + , ranges_{RangeSetAndSuperset( + type, Range(), std::move(ranges), coalesce_ranges)} { +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index b32a6af0cea..8ab378a4c7a 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -322,6 +322,39 @@ class Subarray { bool coalesce_ranges = true, StorageManager* storage_manager = nullptr); + /** + * Constructor. + * + * @param opened_array The opened array the subarray is associated with. + * @param layout The layout of the values of the subarray (of the results + * if the subarray is used for reads, or of the values provided + * by the user for writes). + * @param parent_stats The parent stats to inherit from. + * @param stats_data The stats data to use for the subarray. + * @param logger The parent logger to clone and use for logging. + * @param range_subset Vector of RangeSetAndSuperset for each dimension. + * @param is_default Vector of boolean indicating if the range is default. + * @param label_range_subset Vector of optional for each + * dimension. + * @param attr_range_subset Map of attribute name to a vector of Ranges, for + * each attribute. + * @param relevant_fragments RelevantFragments object for the subarray. + * @param coalesce_ranges When enabled, ranges will attempt to coalesce + * with existing ranges as they are added. + */ + Subarray( + const shared_ptr opened_array, + Layout layout, + stats::Stats* parent_stats, + const stats::StatsData& stats_data, + shared_ptr logger, + std::vector range_subset, + std::vector is_default, + std::vector> label_range_subset, + std::unordered_map> attr_range_subset, + RelevantFragments relevant_fragments, + bool coalesce_ranges = true); + /** * Copy constructor. This performs a deep copy (including memcpy of * underlying buffers). @@ -367,17 +400,6 @@ class Subarray { */ Status set_subarray(const void* subarray); - /** - * Sets the subarray using a pointer to raw range data that stores one range - * per dimension without performing validity checks. - * - * This is only valid for arrays with homogenous dimension data types. This - * function should only be used for deserializing dense write queries. - * - * @param subarray A pointer to the range data to use. - */ - void set_subarray_unsafe(const void* subarray); - /** * Adds dimension ranges computed from label ranges on the dimension label. * @@ -453,6 +475,12 @@ class Subarray { Status add_range( unsigned dim_idx, const void* start, const void* end, const void* stride); + /** + * Adds a range along the dimension with the given index, without + * performing any error checks. + */ + Status add_range_unsafe(uint32_t dim_idx, const Range& range); + /** * @brief Set point ranges from an array * @@ -502,12 +530,6 @@ class Subarray { const void* end, uint64_t end_size); - /** - * Adds a range along the dimension with the given index, without - * performing any error checks. - */ - Status add_range_unsafe(uint32_t dim_idx, const Range& range); - /** * Adds a range to the (read/write) query on the input dimension by name, * in the form of (start, end, stride). @@ -1168,22 +1190,6 @@ class Subarray { */ Status set_ranges_for_dim(uint32_t dim_idx, const std::vector& ranges); - /** - * Directly sets the dimension label ranges for the given dimension index, - * making a deep copy. - * - * @param dim_idx Index of dimension to set - * @param name Name of the dimension label to set - * @param ranges `Range` vector that will be copied and set - * @return Status - * - * @note Intended for serialization only - */ - void set_label_ranges_for_dim( - const uint32_t dim_idx, - const std::string& name, - const std::vector& ranges); - /** * Splits the subarray along the splitting dimension and value into * two new subarrays `r1` and `r2`. @@ -1332,14 +1338,6 @@ class Subarray { /** Returns `stats_`. */ const stats::Stats& stats() const; - /** - * Populate the owned stats instance with data. - * To be removed when the class will get a C41 constructor. - * - * @param data Data to populate the stats with. - */ - void set_stats(const stats::StatsData& data); - /** Stores a vector of 1D ranges per dimension. */ std::vector> original_range_idx_;