Skip to content

Commit 7464c41

Browse files
committed
Add C.41 Subarray constructor
+ Unstatus subarray_from_capnp
1 parent 79030b1 commit 7464c41

File tree

5 files changed

+158
-64
lines changed

5 files changed

+158
-64
lines changed

test/support/src/serialization_wrappers.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,15 @@ void tiledb_subarray_serialize(
204204
.ok());
205205
// Deserialize
206206
tiledb_subarray_t* deserialized_subarray;
207+
auto layout = (*subarray)->subarray_->layout();
208+
auto stats = (*subarray)->subarray_->stats();
209+
shared_ptr<Logger> dummy_logger = make_shared<Logger>(HERE(), "");
210+
207211
tiledb::test::require_tiledb_ok(
208212
ctx, tiledb_subarray_alloc(ctx, array, &deserialized_subarray));
209-
REQUIRE(tiledb::sm::serialization::subarray_from_capnp(
210-
builder, deserialized_subarray->subarray_)
211-
.ok());
213+
*deserialized_subarray->subarray_ =
214+
tiledb::sm::serialization::subarray_from_capnp(
215+
builder, array->array_.get(), layout, stats, dummy_logger);
212216
*subarray = deserialized_subarray;
213217
#endif
214218
}

tiledb/sm/serialization/query.cc

Lines changed: 88 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -261,84 +261,113 @@ Status subarray_to_capnp(
261261
return Status::Ok();
262262
}
263263

264-
Status subarray_from_capnp(
265-
const capnp::Subarray::Reader& reader, Subarray* subarray) {
266-
RETURN_NOT_OK(subarray->set_coalesce_ranges(reader.getCoalesceRanges()));
264+
Subarray subarray_from_capnp(
265+
const capnp::Subarray::Reader& reader,
266+
const Array* array,
267+
Layout layout,
268+
stats::Stats* parent_stats,
269+
shared_ptr<Logger> logger) {
270+
bool coalesce_ranges = reader.getCoalesceRanges();
267271
auto ranges_reader = reader.getRanges();
272+
268273
uint32_t dim_num = ranges_reader.size();
274+
std::vector<RangeSetAndSuperset> range_subset(dim_num);
275+
std::vector<bool> is_default(dim_num, false);
269276
for (uint32_t i = 0; i < dim_num; i++) {
270277
auto range_reader = ranges_reader[i];
271-
272-
auto data = range_reader.getBuffer();
273-
auto data_ptr = data.asBytes();
278+
Datatype type = Datatype::UINT8;
279+
throw_if_not_ok(datatype_enum(range_reader.getType(), &type));
280+
auto dim = array->array_schema_latest().dimension_ptr(i);
281+
282+
bool implicitly_initialized = range_reader.getHasDefaultRange();
283+
range_subset[i] =
284+
RangeSetAndSuperset(dim->type(), dim->domain(), true, coalesce_ranges);
285+
is_default[i] = implicitly_initialized;
274286
if (range_reader.hasBufferSizes()) {
275287
auto ranges = range_buffers_from_capnp(range_reader);
276-
RETURN_NOT_OK(subarray->set_ranges_for_dim(i, ranges));
277-
278-
// Set default indicator
279-
subarray->set_is_default(i, range_reader.getHasDefaultRange());
288+
// If the range is implicitly initialized, the RangeSetAndSuperset
289+
// constructor will initialize the ranges to the domain.
290+
if (!implicitly_initialized) {
291+
// Edge case for dimension labels where there are only label ranges set.
292+
if (ranges.empty()) {
293+
range_subset[i] = RangeSetAndSuperset(
294+
dim->type(), dim->domain(), false, coalesce_ranges);
295+
}
296+
// Add custom ranges, clearing any implicit ranges previously set.
297+
for (const auto& range : ranges) {
298+
throw_if_not_ok(range_subset[i].add_range_unrestricted(range));
299+
}
300+
}
280301
} else {
281302
// Handle 1.7 style ranges where there is a single range with no sizes
303+
auto data = range_reader.getBuffer();
304+
auto data_ptr = data.asBytes();
282305
Range range(data_ptr.begin(), data.size());
283-
RETURN_NOT_OK(subarray->set_ranges_for_dim(i, {range}));
284-
subarray->set_is_default(i, range_reader.getHasDefaultRange());
306+
throw_if_not_ok(range_subset[i].add_range_unrestricted(range));
285307
}
286308
}
287309

310+
std::vector<optional<Subarray::LabelRangeSubset>> label_range_subset(
311+
dim_num, nullopt);
288312
if (reader.hasLabelRanges()) {
289-
subarray->add_default_label_ranges(dim_num);
290313
auto label_ranges_reader = reader.getLabelRanges();
291314
uint32_t label_num = label_ranges_reader.size();
292315
for (uint32_t i = 0; i < label_num; i++) {
293316
auto label_range_reader = label_ranges_reader[i];
294-
auto dim_id = label_range_reader.getDimensionId();
317+
auto dim_index = label_range_reader.getDimensionId();
318+
auto dim = array->array_schema_latest().dimension_ptr(dim_index);
295319
auto label_name = label_range_reader.getName();
296320

297321
// Deserialize ranges for this dim label
298322
auto range_reader = label_range_reader.getRanges();
299323
auto ranges = range_buffers_from_capnp(range_reader);
300324

301325
// Set ranges for this dim label on the subarray
302-
subarray->set_label_ranges_for_dim(dim_id, label_name, ranges);
326+
label_range_subset[dim_index] = {
327+
label_name, dim->type(), coalesce_ranges};
303328
}
304329
}
305330

331+
std::unordered_map<std::string, std::vector<Range>> attr_range_subset;
306332
if (reader.hasAttributeRanges()) {
307-
std::unordered_map<std::string, std::vector<Range>> attr_ranges;
308333
auto attr_ranges_reader = reader.getAttributeRanges();
309334
if (attr_ranges_reader.hasEntries()) {
310-
for (auto attr_ranges_entry : attr_ranges_reader.getEntries()) {
311-
auto range_reader = attr_ranges_entry.getValue();
312-
auto key = std::string_view{
313-
attr_ranges_entry.getKey().cStr(),
314-
attr_ranges_entry.getKey().size()};
315-
attr_ranges[std::string{key}] = range_buffers_from_capnp(range_reader);
335+
for (auto entry : attr_ranges_reader.getEntries()) {
336+
auto range_reader = entry.getValue();
337+
std::string key{entry.getKey().cStr(), entry.getKey().size()};
338+
attr_range_subset[key] = range_buffers_from_capnp(range_reader);
316339
}
317340
}
318-
319-
for (const auto& attr_range : attr_ranges)
320-
subarray->set_attribute_ranges(attr_range.first, attr_range.second);
321341
}
322342

323343
// If cap'n proto object has stats set it on c++ object
344+
Subarray s(array, layout, parent_stats, logger, true);
324345
if (reader.hasStats()) {
325346
auto stats_data = stats_from_capnp(reader.getStats());
326347
subarray->set_stats(stats_data);
327348
}
328349

350+
std::vector<unsigned> relevant_fragments;
329351
if (reader.hasRelevantFragments()) {
330-
auto relevant_fragments = reader.getRelevantFragments();
331-
size_t count = relevant_fragments.size();
332-
std::vector<unsigned> rf;
333-
rf.reserve(count);
352+
auto reader_rf = reader.getRelevantFragments();
353+
size_t count = reader_rf.size();
354+
relevant_fragments.reserve(count);
334355
for (size_t i = 0; i < count; i++) {
335-
rf.emplace_back(relevant_fragments[i]);
356+
relevant_fragments.emplace_back(reader_rf[i]);
336357
}
337-
338-
subarray->relevant_fragments() = RelevantFragments(rf);
339358
}
340359

341-
return Status::Ok();
360+
return {
361+
array,
362+
layout,
363+
reader.hasStats() ? s.stats() : parent_stats,
364+
logger,
365+
range_subset,
366+
is_default,
367+
label_range_subset,
368+
attr_range_subset,
369+
relevant_fragments,
370+
coalesce_ranges};
342371
}
343372

344373
Status subarray_partitioner_to_capnp(
@@ -450,8 +479,8 @@ Status subarray_partitioner_from_capnp(
450479
RETURN_NOT_OK(layout_enum(subarray_reader.getLayout(), &layout));
451480

452481
// Subarray, which is used to initialize the partitioner.
453-
Subarray subarray(array, layout, query_stats, dummy_logger, true);
454-
RETURN_NOT_OK(subarray_from_capnp(reader.getSubarray(), &subarray));
482+
Subarray subarray = subarray_from_capnp(
483+
subarray_reader, array, layout, query_stats, dummy_logger);
455484
*partitioner = SubarrayPartitioner(
456485
&config,
457486
subarray,
@@ -508,10 +537,12 @@ Status subarray_partitioner_from_capnp(
508537
partition_info->end_ = partition_info_reader.getEnd();
509538
partition_info->split_multi_range_ =
510539
partition_info_reader.getSplitMultiRange();
511-
partition_info->partition_ =
512-
Subarray(array, layout, query_stats, dummy_logger, true);
513-
RETURN_NOT_OK(subarray_from_capnp(
514-
partition_info_reader.getSubarray(), &partition_info->partition_));
540+
partition_info->partition_ = subarray_from_capnp(
541+
partition_info_reader.getSubarray(),
542+
array,
543+
layout,
544+
query_stats,
545+
dummy_logger);
515546

516547
if (compute_current_tile_overlap) {
517548
throw_if_not_ok(partition_info->partition_.precompute_tile_overlap(
@@ -531,20 +562,18 @@ Status subarray_partitioner_from_capnp(
531562
auto sr_reader = state_reader.getSingleRange();
532563
const unsigned num_sr = sr_reader.size();
533564
for (unsigned i = 0; i < num_sr; i++) {
534-
auto subarray_reader_ = sr_reader[i];
535-
state->single_range_.emplace_back(
536-
array, layout, query_stats, dummy_logger, true);
537-
Subarray& subarray_ = state->single_range_.back();
538-
RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_));
565+
auto subarray_reader = sr_reader[i];
566+
Subarray subarray = subarray_from_capnp(
567+
subarray_reader, array, layout, query_stats, dummy_logger);
568+
state->single_range_.push_back(subarray);
539569
}
540570
auto m_reader = state_reader.getMultiRange();
541571
const unsigned num_m = m_reader.size();
542572
for (unsigned i = 0; i < num_m; i++) {
543-
auto subarray_reader_ = m_reader[i];
544-
state->multi_range_.emplace_back(
545-
array, layout, query_stats, dummy_logger, true);
546-
Subarray& subarray_ = state->multi_range_.back();
547-
RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_));
573+
auto subarray_reader = m_reader[i];
574+
Subarray subarray = subarray_from_capnp(
575+
subarray_reader, array, layout, query_stats, dummy_logger);
576+
state->multi_range_.push_back(subarray);
548577
}
549578

550579
// Overall mem budget
@@ -1107,9 +1136,9 @@ Status reader_from_capnp(
11071136
RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout));
11081137

11091138
// Subarray
1110-
Subarray subarray(array, layout, query->stats(), dummy_logger, true);
11111139
auto subarray_reader = reader_reader.getSubarray();
1112-
RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray));
1140+
Subarray subarray = subarray_from_capnp(
1141+
subarray_reader, array, layout, query->stats(), dummy_logger);
11131142
RETURN_NOT_OK(query->set_subarray_unsafe(subarray));
11141143

11151144
// Read state
@@ -1145,9 +1174,9 @@ Status index_reader_from_capnp(
11451174
RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout));
11461175

11471176
// Subarray
1148-
Subarray subarray(array, layout, query->stats(), dummy_logger, true);
11491177
auto subarray_reader = reader_reader.getSubarray();
1150-
RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray));
1178+
Subarray subarray = subarray_from_capnp(
1179+
subarray_reader, array, layout, query->stats(), dummy_logger);
11511180
RETURN_NOT_OK(query->set_subarray_unsafe(subarray));
11521181

11531182
// Read state
@@ -1184,9 +1213,9 @@ Status dense_reader_from_capnp(
11841213
RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout));
11851214

11861215
// Subarray
1187-
Subarray subarray(array, layout, query->stats(), dummy_logger, true);
11881216
auto subarray_reader = reader_reader.getSubarray();
1189-
RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray));
1217+
Subarray subarray = subarray_from_capnp(
1218+
subarray_reader, array, layout, query->stats(), dummy_logger);
11901219
RETURN_NOT_OK(query->set_subarray_unsafe(subarray));
11911220

11921221
// Read state
@@ -2210,9 +2239,9 @@ Status query_from_capnp(
22102239

22112240
// Subarray
22122241
if (writer_reader.hasSubarrayRanges()) {
2213-
Subarray subarray(array, layout, query->stats(), dummy_logger, true);
22142242
auto subarray_reader = writer_reader.getSubarrayRanges();
2215-
RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray));
2243+
Subarray subarray = subarray_from_capnp(
2244+
subarray_reader, array, layout, query->stats(), dummy_logger);
22162245
RETURN_NOT_OK(query->set_subarray_unsafe(subarray));
22172246
}
22182247
}
@@ -3164,9 +3193,9 @@ void ordered_dim_label_reader_from_capnp(
31643193
throw_if_not_ok(layout_enum(reader_reader.getLayout(), &layout));
31653194

31663195
// Subarray
3167-
Subarray subarray(array, layout, query->stats(), dummy_logger, false);
31683196
auto subarray_reader = reader_reader.getSubarray();
3169-
throw_if_not_ok(subarray_from_capnp(subarray_reader, &subarray));
3197+
Subarray subarray = subarray_from_capnp(
3198+
subarray_reader, array, layout, query->stats(), dummy_logger);
31703199
throw_if_not_ok(query->set_subarray_unsafe(subarray));
31713200

31723201
// OrderedDimLabelReader requires an initialized subarray for construction.

tiledb/sm/serialization/query.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,12 @@ Status subarray_to_capnp(
252252
const Subarray* subarray,
253253
capnp::Subarray::Builder* builder);
254254

255-
Status subarray_from_capnp(
256-
const capnp::Subarray::Reader& reader, Subarray* subarray);
255+
Subarray subarray_from_capnp(
256+
const capnp::Subarray::Reader& reader,
257+
const Array* array,
258+
Layout layout,
259+
stats::Stats* parent_stats,
260+
shared_ptr<Logger> logger);
257261

258262
void ordered_dim_label_reader_to_capnp(
259263
const Query& query,

tiledb/sm/subarray/subarray.cc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,32 @@ Subarray::Subarray(
150150
add_default_ranges();
151151
}
152152

153+
Subarray::Subarray(
154+
const Array* array,
155+
Layout layout,
156+
stats::Stats* stats,
157+
shared_ptr<Logger> logger,
158+
std::vector<RangeSetAndSuperset> range_subset,
159+
std::vector<bool> is_default,
160+
std::vector<optional<Subarray::LabelRangeSubset>> label_range_subset,
161+
std::unordered_map<std::string, std::vector<Range>> attr_range_subset,
162+
std::vector<unsigned int> relevant_fragments,
163+
bool coalesce_ranges)
164+
: stats_(stats)
165+
, logger_(std::move(logger))
166+
, array_(array->opened_array())
167+
, layout_(layout)
168+
, cell_order_(array_->array_schema_latest().cell_order())
169+
, range_subset_(std::move(range_subset))
170+
, label_range_subset_(std::move(label_range_subset))
171+
, attr_range_subset_(std::move(attr_range_subset))
172+
, is_default_(std::move(is_default))
173+
, est_result_size_computed_(false)
174+
, relevant_fragments_(relevant_fragments)
175+
, coalesce_ranges_(coalesce_ranges)
176+
, ranges_sorted_(false) {
177+
}
178+
153179
Subarray::Subarray(const Subarray& subarray)
154180
: Subarray() {
155181
// Make a deep-copy clone

tiledb/sm/subarray/subarray.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,37 @@ class Subarray {
322322
bool coalesce_ranges = true,
323323
StorageManager* storage_manager = nullptr);
324324

325+
/**
326+
* Constructor.
327+
*
328+
* @param opened_array The opened array the subarray is associated with.
329+
* @param layout The layout of the values of the subarray (of the results
330+
* if the subarray is used for reads, or of the values provided
331+
* by the user for writes).
332+
* @param parent_stats The parent stats to inherit from.
333+
* @param logger The parent logger to clone and use for logging
334+
* @param range_subset Vector of RangeSetAndSuperset for each dimension.
335+
* @param is_default Vector of boolean indicating if the range is default.
336+
* @param label_range_subset Vector of optional<LabelRangeSubset> for each
337+
* dimension.
338+
* @param attr_range_subset Map of attribute name to a vector of Ranges, for
339+
* each attribute.
340+
* @param relevant_fragments RelevantFragments object for the subarray.
341+
* @param coalesce_ranges When enabled, ranges will attempt to coalesce
342+
* with existing ranges as they are added
343+
*/
344+
Subarray(
345+
const Array* array,
346+
Layout layout,
347+
stats::Stats* stats,
348+
shared_ptr<tiledb::common::Logger> logger,
349+
std::vector<RangeSetAndSuperset> range_subset,
350+
std::vector<bool> is_default,
351+
std::vector<optional<LabelRangeSubset>> label_range_subset,
352+
std::unordered_map<std::string, std::vector<Range>> attr_range_subset,
353+
std::vector<unsigned int> relevant_fragments,
354+
bool coalesce_ranges = true);
355+
325356
/**
326357
* Copy constructor. This performs a deep copy (including memcpy of
327358
* underlying buffers).

0 commit comments

Comments
 (0)