Skip to content

Commit 66c50c0

Browse files
authored
feat: [sc-63548] [core] enable NULL test query conditions to run on any attribute (#5464)
Story details: https://app.shortcut.com/tiledb-inc/story/63548 Previously adding a null test query condition on a non-nullable attribute would return an error. This is undesirable. Users should not have to intimately know their target schema in order to write a query. There is no reason we can't evaluate the query condition - it simply is statically known to be always true or always false. As such, this pull request allows null test query conditions to run on non-nullable attributes by re-writing into the `ALWAYS_TRUE` or `ALWAYS_FALSE` operators. This pull request also goes further, enabling the null test query condition to run on all attribute datatypes and cell val nums. --- TYPE: IMPROVEMENT DESC: enable null test query conditions on non-nullable attributes
1 parent 86d00f9 commit 66c50c0

File tree

10 files changed

+567
-63
lines changed

10 files changed

+567
-63
lines changed

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ if (TILEDB_CPP_API)
176176
src/unit-cppapi-nullable.cc
177177
src/unit-cppapi-partial-attribute-write.cc
178178
src/unit-cppapi-query.cc
179+
src/unit-cppapi-query-condition.cc
179180
src/unit-cppapi-query-condition-enumerations.cc
180181
src/unit-cppapi-query-condition-sets.cc
181182
src/cpp-integration-query-condition.cc

test/src/unit-cppapi-query-condition-enumerations.cc

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ TEST_CASE_METHOD(
547547
auto qc =
548548
QueryCondition::create(ctx_, "cell_type", std::string("fish"), TILEDB_NE);
549549
auto core_qc = qc.ptr().get()->query_condition_;
550-
core_qc->rewrite_enumeration_conditions(core_array->array_schema_latest());
550+
core_qc->rewrite_for_schema(core_array->array_schema_latest());
551551

552552
auto matcher = Catch::Matchers::ContainsSubstring(
553553
"Invalid negation of rewritten query.");
@@ -570,7 +570,7 @@ TEST_CASE_METHOD(
570570
auto qc =
571571
QueryCondition::create(ctx_, "cell_type", std::string("fish"), TILEDB_EQ);
572572
auto core_qc = qc.ptr().get()->query_condition_;
573-
core_qc->rewrite_enumeration_conditions(core_array->array_schema_latest());
573+
core_qc->rewrite_for_schema(core_array->array_schema_latest());
574574

575575
auto matcher = Catch::Matchers::ContainsSubstring(
576576
"Invalid negation of rewritten query.");
@@ -601,6 +601,22 @@ TEST_CASE(
601601
REQUIRE_THROWS_WITH(QueryCondition::create(ctx, "foo", 0, op), matcher);
602602
}
603603

604+
TEST_CASE_METHOD(
605+
CPPQueryConditionEnumerationFx,
606+
"Nullable Enumeration Non-Equality",
607+
"[query-condition][enumeration][logic]") {
608+
auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE);
609+
auto serialize = GENERATE_SERIALIZATION();
610+
auto matcher = [](const EnmrQCCell& cell) { return cell.cycle_phase_valid; };
611+
612+
auto creator = [](Context& ctx) {
613+
return QueryCondition::create(
614+
ctx, "cycle_phase", std::string("fish"), TILEDB_NE);
615+
};
616+
617+
run_test(type, serialize, matcher, creator);
618+
}
619+
604620
/*
605621
* All code below here is test support implementation.
606622
*/
@@ -1148,7 +1164,7 @@ void CPPQueryConditionEnumerationFx::validate_query_condition(
11481164

11491165
auto qc = creator(ctx_);
11501166
auto core_qc = qc.ptr().get()->query_condition_;
1151-
core_qc->rewrite_enumeration_conditions(core_array->array_schema_latest());
1167+
core_qc->rewrite_for_schema(core_array->array_schema_latest());
11521168

11531169
REQUIRE(core_qc->check(core_array->array_schema_latest()).ok());
11541170
}
Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
/**
2+
* @file unit-cppapi-query-condition.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2025 TileDB Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* Tests the C++ API for query condition related functions.
31+
*/
32+
33+
#include <test/support/catch/array_schema.h>
34+
#include <test/support/tdb_catch.h>
35+
36+
#include "test/support/src/array_helpers.h"
37+
#include "tiledb/sm/cpp_api/tiledb"
38+
#include "tiledb/sm/enums/layout.h"
39+
#include "tiledb/sm/misc/constants.h"
40+
41+
#include <numeric>
42+
43+
using namespace tiledb;
44+
45+
TEST_CASE("Query condition null test", "[query-condition]") {
46+
const auto array_type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE);
47+
const auto attr_datatype = GENERATE_CPPAPI_ALL_DATATYPES();
48+
const uint32_t attr_cell_val_num =
49+
GENERATE(1, 4, tiledb::sm::constants::var_num);
50+
const bool attr_nullable = GENERATE(true, false);
51+
52+
const bool is_var = (attr_cell_val_num == tiledb::sm::constants::var_num);
53+
const size_t value_size = tiledb::sm::datatype_size(attr_datatype);
54+
if (attr_datatype == tiledb::sm::Datatype::ANY && !is_var) {
55+
// not supported
56+
return;
57+
}
58+
59+
Context ctx;
60+
std::string uri("query_condition_null_test");
61+
62+
DYNAMIC_SECTION(
63+
"Null test query condition: (array_type, datatype, cell_val_num, "
64+
"nullable) = (" +
65+
std::string(array_type == TILEDB_SPARSE ? "SPARSE" : "DENSE") + ", " +
66+
tiledb::sm::datatype_str(attr_datatype) + ", " +
67+
(is_var ? "VAR" : std::to_string(attr_cell_val_num)) + ", " +
68+
std::to_string(attr_nullable) + ")") {
69+
std::vector<uint8_t> fill_value;
70+
fill_value.resize(is_var ? value_size : attr_cell_val_num * value_size);
71+
std::iota(fill_value.begin(), fill_value.end(), '1');
72+
73+
// create array
74+
{
75+
ArraySchema schema(ctx, array_type);
76+
77+
auto dim = Dimension::create<uint32_t>(ctx, "id", {{1, 4}});
78+
auto dom = Domain(ctx);
79+
dom.add_dimension(dim);
80+
schema.set_domain(dom);
81+
82+
auto att = Attribute::create(
83+
ctx, "a", static_cast<tiledb_datatype_t>(attr_datatype))
84+
.set_cell_val_num(attr_cell_val_num)
85+
.set_fill_value(fill_value.data(), fill_value.size())
86+
.set_nullable(attr_nullable);
87+
schema.add_attribute(att);
88+
89+
Array::create(uri, schema);
90+
}
91+
92+
test::DeleteArrayGuard delguard(ctx.ptr().get(), uri.c_str());
93+
94+
// prepare data
95+
std::vector<uint32_t> w_dimension = {1, 2, 3};
96+
std::vector<uint64_t> w_offsets;
97+
std::vector<uint8_t> w_values;
98+
std::vector<uint8_t> w_validity = {1, 0, 1};
99+
if (is_var) {
100+
w_offsets = {0, value_size, value_size};
101+
w_values.resize(3 * 2 * value_size);
102+
std::iota(w_values.begin(), w_values.end(), 'B');
103+
} else {
104+
w_values.resize(3 * attr_cell_val_num * value_size);
105+
std::iota(w_values.begin(), w_values.end(), 'C');
106+
}
107+
108+
// insert data
109+
{
110+
Array array(ctx, uri, TILEDB_WRITE);
111+
Query query(ctx, array);
112+
113+
if (array_type == TILEDB_SPARSE) {
114+
query.set_data_buffer("id", w_dimension);
115+
} else {
116+
Subarray subarray(ctx, array);
117+
subarray.add_range<uint32_t>(0, 1, 3);
118+
query.set_subarray(subarray);
119+
}
120+
121+
if (is_var) {
122+
query.set_data_buffer("a", static_cast<void*>(w_values.data()), 3 * 2)
123+
.set_offsets_buffer("a", w_offsets);
124+
} else {
125+
query.set_data_buffer(
126+
"a", static_cast<void*>(w_values.data()), 3 * attr_cell_val_num);
127+
}
128+
if (attr_nullable) {
129+
query.set_validity_buffer("a", w_validity);
130+
}
131+
132+
REQUIRE(query.submit() == Query::Status::COMPLETE);
133+
}
134+
135+
// then read with query condition
136+
const auto eq_op = GENERATE(TILEDB_EQ, TILEDB_NE);
137+
const std::string qc_attr = GENERATE("id", "a");
138+
139+
std::set<tiledb::sm::Layout> layouts = {
140+
tiledb::sm::Layout::UNORDERED,
141+
tiledb::sm::Layout::ROW_MAJOR,
142+
tiledb::sm::Layout::COL_MAJOR,
143+
tiledb::sm::Layout::GLOBAL_ORDER};
144+
145+
if (!(attr_cell_val_num == 1 || is_var)) {
146+
// wrong results for some reason
147+
layouts.erase(tiledb::sm::Layout::ROW_MAJOR);
148+
layouts.erase(tiledb::sm::Layout::COL_MAJOR);
149+
}
150+
if (array_type == TILEDB_DENSE) {
151+
// assertion failure
152+
layouts.erase(tiledb::sm::Layout::UNORDERED);
153+
}
154+
155+
const auto layout = GENERATE_COPY(from_range(layouts));
156+
157+
DYNAMIC_SECTION(
158+
tiledb::sm::layout_str(layout) + ": " + qc_attr +
159+
std::string(eq_op == TILEDB_EQ ? " IS" : " IS NOT") + " NULL") {
160+
Array array(ctx, uri, TILEDB_READ);
161+
Query query(ctx, array);
162+
query.set_layout(static_cast<tiledb_layout_t>(layout));
163+
164+
std::vector<uint32_t> r_dimension(3);
165+
166+
const size_t num_var_values_per_cell = 8;
167+
std::vector<uint8_t> r_values(
168+
3 * (is_var ? num_var_values_per_cell * value_size :
169+
attr_cell_val_num * value_size));
170+
std::vector<uint64_t> r_offsets(3);
171+
std::vector<uint8_t> r_validity(3);
172+
173+
QueryCondition qc(ctx);
174+
qc.init(qc_attr, nullptr, 0, eq_op);
175+
query.set_condition(qc).set_data_buffer("id", r_dimension);
176+
if (is_var) {
177+
query
178+
.set_data_buffer(
179+
"a",
180+
static_cast<void*>(r_values.data()),
181+
3 * num_var_values_per_cell)
182+
.set_offsets_buffer("a", r_offsets);
183+
} else {
184+
query.set_data_buffer(
185+
"a", static_cast<void*>(r_values.data()), 3 * attr_cell_val_num);
186+
}
187+
if (attr_nullable) {
188+
query.set_validity_buffer("a", r_validity);
189+
}
190+
191+
if (array_type == TILEDB_DENSE) {
192+
Subarray subarray(ctx, array);
193+
subarray.add_range<uint32_t>(0, 1, 3);
194+
query.set_subarray(subarray);
195+
}
196+
197+
REQUIRE(query.submit() == Query::Status::COMPLETE);
198+
199+
auto table = query.result_buffer_elements();
200+
r_dimension.resize(table["id"].second);
201+
202+
if (is_var) {
203+
r_validity.resize(table["a"].first);
204+
r_offsets.resize(table["a"].first);
205+
r_values.resize(table["a"].second * value_size);
206+
} else {
207+
r_validity.resize(table["a"].second / attr_cell_val_num);
208+
r_offsets.clear();
209+
r_values.resize(table["a"].second * value_size);
210+
}
211+
212+
std::vector<uint8_t> expect_values;
213+
auto expect_cell = [&](size_t cell) {
214+
if (is_var) {
215+
expect_values.insert(
216+
expect_values.end(),
217+
w_values.begin() + w_offsets[cell],
218+
w_values.begin() + (cell + 1 == w_offsets.size() ?
219+
w_values.size() :
220+
w_offsets[cell + 1]));
221+
} else {
222+
expect_values.insert(
223+
expect_values.end(),
224+
w_values.begin() + (cell + 0) * attr_cell_val_num * value_size,
225+
w_values.begin() + (cell + 1) * attr_cell_val_num * value_size);
226+
}
227+
};
228+
auto expect_fill = [&]() {
229+
expect_values.insert(
230+
expect_values.end(), fill_value.begin(), fill_value.end());
231+
};
232+
233+
if (qc_attr == "a" && attr_nullable) {
234+
// (value, NULL, value)
235+
if (array_type == TILEDB_SPARSE) {
236+
if (eq_op == TILEDB_NE) {
237+
// (value, value)
238+
CHECK(
239+
r_dimension ==
240+
std::vector<uint32_t>{w_dimension[0], w_dimension[2]});
241+
CHECK(r_validity == std::vector<uint8_t>{1, 1});
242+
243+
std::vector<uint8_t> expect;
244+
if (is_var) {
245+
CHECK(
246+
r_offsets ==
247+
std::vector<uint64_t>{w_offsets[0], w_offsets[2]});
248+
}
249+
expect_cell(0);
250+
expect_cell(2);
251+
CHECK(r_values == expect_values);
252+
} else {
253+
// (NULL)
254+
CHECK(r_dimension == std::vector<uint32_t>{w_dimension[1]});
255+
CHECK(r_validity == std::vector<uint8_t>{0});
256+
if (is_var) {
257+
CHECK(r_offsets == std::vector<uint64_t>{0});
258+
}
259+
expect_cell(1);
260+
CHECK(r_values == expect_values);
261+
}
262+
} else {
263+
// we always will have three values, the filtered ones are replaced
264+
// with fill value
265+
if (eq_op == TILEDB_NE) {
266+
// (value, fill, value)
267+
CHECK(r_validity == std::vector<uint8_t>{1, 0, 1});
268+
if (is_var) {
269+
CHECK(
270+
r_offsets ==
271+
std::vector<uint64_t>{0, value_size, 2 * value_size});
272+
}
273+
expect_cell(0);
274+
expect_fill();
275+
expect_cell(2);
276+
CHECK(r_values == expect_values);
277+
} else {
278+
// (fill, value, fill)
279+
CHECK(r_validity == std::vector<uint8_t>{0, 0, 0});
280+
if (is_var) {
281+
CHECK(
282+
r_offsets ==
283+
std::vector<uint64_t>{0, value_size, value_size});
284+
}
285+
expect_fill();
286+
expect_cell(1);
287+
expect_fill();
288+
CHECK(r_values == expect_values);
289+
}
290+
}
291+
} else {
292+
if (eq_op == TILEDB_NE) {
293+
// no NULLs, this is always true, we should see all cells
294+
CHECK(r_dimension == w_dimension);
295+
if (attr_nullable) {
296+
CHECK(r_validity == w_validity);
297+
}
298+
if (is_var) {
299+
CHECK(r_offsets == w_offsets);
300+
}
301+
CHECK(r_values == w_values);
302+
} else {
303+
// EQ NULL will filter all rows
304+
if (array_type == TILEDB_SPARSE) {
305+
// they actually will be filtered
306+
CHECK(r_dimension.empty());
307+
CHECK(r_validity.empty());
308+
CHECK(r_offsets.empty());
309+
CHECK(r_values.empty());
310+
} else {
311+
// they will be replaced with fill values
312+
if (attr_nullable) {
313+
CHECK(r_validity == std::vector<uint8_t>{0, 0, 0});
314+
}
315+
if (is_var) {
316+
CHECK(
317+
r_offsets ==
318+
std::vector<uint64_t>{0, value_size, 2 * value_size});
319+
}
320+
expect_fill();
321+
expect_fill();
322+
expect_fill();
323+
CHECK(r_values == expect_values);
324+
}
325+
}
326+
}
327+
}
328+
}
329+
}

0 commit comments

Comments
 (0)