Skip to content

Commit fd24448

Browse files
Use unordered layout for aggregate-only read queries on sparse arrays. (#5255)
[SC-53286](https://app.shortcut.com/tiledb-inc/story/53286/ignore-query-order-in-aggregate-only-queries) If the user has not set a query layout, it will default to row-major, which will use the legacy reader on sparse arrays, and fail if aggregates were specified. However, if only aggregates are specified and no regular data buffers, the layout doesn't matter and we can transparently switch to the much more efficient unordered layout. --- TYPE: IMPROVEMENT DESC: Fix read queries on sparse arrays where only aggregates are requested and no layout is specified.
1 parent b239ff5 commit fd24448

File tree

3 files changed

+74
-12
lines changed

3 files changed

+74
-12
lines changed

test/src/test-cppapi-aggregates.cc

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ void CppAggregatesFx<T>::generate_test_params() {
173173
nullable_ = GENERATE(true, false);
174174
allow_dups_ = GENERATE(true, false);
175175
set_qc_values_ = {false};
176-
layout_values_ = {TILEDB_UNORDERED};
176+
layout_values_ = {TILEDB_ROW_MAJOR, TILEDB_UNORDERED};
177177
use_dim_values_ = {true, false};
178178
if (nullable_ || !std::is_same<T, uint64_t>::value) {
179179
use_dim_values_ = {false};
@@ -195,6 +195,12 @@ void CppAggregatesFx<T>::run_all_combinations(std::function<void()> fn) {
195195
for (bool set_qc : set_qc_values_) {
196196
set_qc_ = set_qc;
197197
for (tiledb_layout_t layout : layout_values_) {
198+
// Filter invalid combination. The legacy reader does not support
199+
// aggregates, and we cannot automatically switch to unordered
200+
// reads if we are requesting both the aggregates and the data.
201+
if (request_data && layout != TILEDB_UNORDERED) {
202+
continue;
203+
}
198204
layout_ = layout;
199205
fn();
200206
}
@@ -1444,6 +1450,12 @@ TEST_CASE_METHOD(
14441450
for (bool set_qc : set_qc_values_) {
14451451
set_qc_ = set_qc;
14461452
for (tiledb_layout_t layout : layout_values_) {
1453+
// Filter invalid combination. The legacy reader does not support
1454+
// aggregates, and we cannot automatically switch to unordered
1455+
// reads if we are requesting both the aggregates and the data.
1456+
if (request_data && layout != TILEDB_UNORDERED) {
1457+
continue;
1458+
}
14471459
layout_ = layout;
14481460
Query query(ctx_, array, TILEDB_READ);
14491461

@@ -1824,6 +1836,11 @@ TEMPLATE_LIST_TEST_CASE(
18241836
for (bool set_qc : fx.set_qc_values_) {
18251837
fx.set_qc_ = set_qc;
18261838
for (tiledb_layout_t layout : fx.layout_values_) {
1839+
// Filter invalid combination. The legacy reader does not support
1840+
// aggregates, and we cannot automatically switch to unordered
1841+
// reads if we are requesting both the aggregates and the data.
1842+
if (!fx.dense_ && request_data && layout != TILEDB_UNORDERED)
1843+
continue;
18271844
fx.layout_ = layout;
18281845
Query query(fx.ctx_, array, TILEDB_READ);
18291846

@@ -2048,6 +2065,12 @@ TEST_CASE_METHOD(
20482065
for (bool set_qc : set_qc_values_) {
20492066
set_qc_ = set_qc;
20502067
for (tiledb_layout_t layout : layout_values_) {
2068+
// Filter invalid combination. The legacy reader does not support
2069+
// aggregates, and we cannot automatically switch to unordered
2070+
// reads if we are requesting both the aggregates and the data.
2071+
if (request_data && layout != TILEDB_UNORDERED) {
2072+
continue;
2073+
}
20512074
layout_ = layout;
20522075
Query query(ctx_, array, TILEDB_READ);
20532076

@@ -2146,6 +2169,12 @@ TEST_CASE_METHOD(
21462169
for (bool set_qc : set_qc_values_) {
21472170
set_qc_ = set_qc;
21482171
for (tiledb_layout_t layout : layout_values_) {
2172+
// Filter invalid combination. The legacy reader does not support
2173+
// aggregates, and we cannot automatically switch to unordered
2174+
// reads if we are requesting both the aggregates and the data.
2175+
if (layout != TILEDB_UNORDERED) {
2176+
continue;
2177+
}
21492178
layout_ = layout;
21502179
Query query(ctx_, array, TILEDB_READ);
21512180

@@ -2285,6 +2314,11 @@ TEST_CASE_METHOD(
22852314
for (bool set_qc : set_qc_values_) {
22862315
set_qc_ = set_qc;
22872316
for (tiledb_layout_t layout : layout_values_) {
2317+
// Filter invalid combination. The legacy reader does not support
2318+
// aggregates, and we cannot automatically switch to unordered
2319+
// reads if we are requesting both the aggregates and the data.
2320+
if (layout != TILEDB_UNORDERED)
2321+
continue;
22882322
layout_ = layout;
22892323
Query query(ctx_, array, TILEDB_READ);
22902324

@@ -2365,6 +2399,11 @@ TEMPLATE_LIST_TEST_CASE_METHOD(
23652399
for (bool set_qc : CppAggregatesFx<T>::set_qc_values_) {
23662400
CppAggregatesFx<T>::set_qc_ = set_qc;
23672401
for (tiledb_layout_t layout : CppAggregatesFx<T>::layout_values_) {
2402+
// Filter invalid combination. The legacy reader does not support
2403+
// aggregates, and we cannot automatically switch to unordered
2404+
// reads if we are requesting both the aggregates and the data.
2405+
if (!CppAggregatesFx<T>::dense_ && layout != TILEDB_UNORDERED)
2406+
continue;
23682407
CppAggregatesFx<T>::layout_ = layout;
23692408
Query query(CppAggregatesFx<T>::ctx_, array, TILEDB_READ);
23702409

tiledb/sm/query/query.cc

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,7 +1762,21 @@ bool Query::is_aggregate(std::string output_field_name) const {
17621762
/* PRIVATE METHODS */
17631763
/* ****************************** */
17641764

1765+
Layout Query::effective_layout() const {
1766+
// If the user has not set a layout, it will default to row-major, which will
1767+
// use the legacy reader on sparse arrays, and fail if aggregates were
1768+
// specified. However, if only aggregates are specified and no regular data
1769+
// buffers, the layout doesn't matter and we can transparently switch to the
1770+
// much more efficient unordered layout.
1771+
if (type_ == QueryType::READ && !array_schema_->dense() && has_aggregates() &&
1772+
buffers_.empty()) {
1773+
return Layout::UNORDERED;
1774+
}
1775+
return layout_;
1776+
}
1777+
17651778
Status Query::create_strategy(bool skip_checks_serialization) {
1779+
auto layout = effective_layout();
17661780
auto params = StrategyParams(
17671781
resources_,
17681782
array_->memory_tracker(),
@@ -1775,16 +1789,16 @@ Status Query::create_strategy(bool skip_checks_serialization) {
17751789
buffers_,
17761790
aggregate_buffers_,
17771791
subarray_,
1778-
layout_,
1792+
layout,
17791793
condition_,
17801794
default_channel_aggregates_,
17811795
skip_checks_serialization);
17821796
if (type_ == QueryType::WRITE || type_ == QueryType::MODIFY_EXCLUSIVE) {
1783-
if (layout_ == Layout::COL_MAJOR || layout_ == Layout::ROW_MAJOR) {
1797+
if (layout == Layout::COL_MAJOR || layout == Layout::ROW_MAJOR) {
17841798
if (!array_schema_->dense()) {
17851799
return Status_QueryError(
17861800
"Cannot create strategy; sparse writes do not support layout " +
1787-
layout_str(layout_));
1801+
layout_str(layout));
17881802
}
17891803
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
17901804
OrderedWriter,
@@ -1795,11 +1809,11 @@ Status Query::create_strategy(bool skip_checks_serialization) {
17951809
coords_info_,
17961810
remote_query_,
17971811
fragment_name_));
1798-
} else if (layout_ == Layout::UNORDERED) {
1812+
} else if (layout == Layout::UNORDERED) {
17991813
if (array_schema_->dense()) {
18001814
return Status_QueryError(
18011815
"Cannot create strategy; dense writes do not support layout " +
1802-
layout_str(layout_));
1816+
layout_str(layout));
18031817
}
18041818
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
18051819
UnorderedWriter,
@@ -1811,7 +1825,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
18111825
written_buffers_,
18121826
remote_query_,
18131827
fragment_name_));
1814-
} else if (layout_ == Layout::GLOBAL_ORDER) {
1828+
} else if (layout == Layout::GLOBAL_ORDER) {
18151829
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
18161830
GlobalOrderWriter,
18171831
stats_->create_child("Writer"),
@@ -1826,7 +1840,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
18261840
fragment_name_));
18271841
} else {
18281842
return Status_QueryError(
1829-
"Cannot create strategy; unsupported layout " + layout_str(layout_));
1843+
"Cannot create strategy; unsupported layout " + layout_str(layout));
18301844
}
18311845
} else if (type_ == QueryType::READ) {
18321846
bool all_dense = true;
@@ -1854,7 +1868,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
18541868
params,
18551869
dimension_label_increasing_));
18561870
} else if (use_refactored_sparse_unordered_with_dups_reader(
1857-
layout_, *array_schema_)) {
1871+
layout, *array_schema_)) {
18581872
if (Query::non_overlapping_ranges() || !subarray_.is_set() ||
18591873
subarray_.range_num() == 1) {
18601874
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
@@ -1870,9 +1884,9 @@ Status Query::create_strategy(bool skip_checks_serialization) {
18701884
params));
18711885
}
18721886
} else if (
1873-
use_refactored_sparse_global_order_reader(layout_, *array_schema_) &&
1887+
use_refactored_sparse_global_order_reader(layout, *array_schema_) &&
18741888
!array_schema_->dense() &&
1875-
(layout_ == Layout::GLOBAL_ORDER || layout_ == Layout::UNORDERED)) {
1889+
(layout == Layout::GLOBAL_ORDER || layout == Layout::UNORDERED)) {
18761890
// Using the reader for unordered queries to do deduplication.
18771891
if (Query::non_overlapping_ranges() || !subarray_.is_set() ||
18781892
subarray_.range_num() == 1) {

tiledb/sm/query/query.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,7 @@ class Query {
879879
/**
880880
* Returns true if the query has any aggregates on any channels
881881
*/
882-
bool has_aggregates() {
882+
bool has_aggregates() const {
883883
// We only need to check the default channel for now
884884
return !default_channel_aggregates_.empty();
885885
}
@@ -1149,6 +1149,15 @@ class Query {
11491149
/* PRIVATE METHODS */
11501150
/* ********************************* */
11511151

1152+
/**
1153+
* Return the layout that will be used to execute the query.
1154+
*
1155+
* This is usually set by the user but can be overridden by TileDB in cases
1156+
* where the data order would not have an observable difference, like queries
1157+
* with only aggregates.
1158+
*/
1159+
Layout effective_layout() const;
1160+
11521161
/**
11531162
* Create the strategy.
11541163
*

0 commit comments

Comments
 (0)