Skip to content

Commit 8b15119

Browse files
rroelkeKiterLuc
andauthored
Regression test for SC-53334. (#5254)
This demonstrates an error running the MIN aggregate function on the `STRING_ASCII` datatype whose cell val num is 1. Expected behavior: this should behave the same way as using the `UINT8` data type, which has the same physical 1-byte data width. Or, if this expectation is against tiledb philosophy, at least there should be a clear error about the operation being unsupported. The added regression test demonstrates that we do not have the same behavior for these two types. Running MIN on `STRING_ASCII` throws an exception. --- TYPE: NO_HISTORY DESC: Regression test for SC-53334. --------- Co-authored-by: KiterLuc <[email protected]>
1 parent 9b4e5ea commit 8b15119

File tree

2 files changed

+198
-0
lines changed

2 files changed

+198
-0
lines changed

test/regression/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ if (TILEDB_CPP_API)
5151
list(APPEND SOURCES targets/sc-35424.cc)
5252
list(APPEND SOURCES targets/sc-36372.cc)
5353
list(APPEND SOURCES targets/sc-38300.cc)
54+
list(APPEND SOURCES targets/sc-53334.cc)
5455
endif()
5556

5657
add_executable(tiledb_regression
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/**
2+
* @file sc-53334.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2024 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* When run, this program will create a simple 2D sparse array, write some data
31+
* to it in global order, and read the data back with aggregates.
32+
*/
33+
34+
#include <stdio.h>
35+
#include <stdlib.h>
36+
#include <tiledb/tiledb.h>
37+
#include <tiledb/tiledb>
38+
#include <tiledb/tiledb_experimental>
39+
#include <vector>
40+
41+
#include <catch2/catch_template_test_macros.hpp>
42+
43+
using namespace tiledb;
44+
45+
typedef char min_type;
46+
47+
template <tiledb_datatype_t attribute_type>
48+
struct AttributeTraits;
49+
50+
template <>
51+
struct AttributeTraits<TILEDB_UINT8> {
52+
typedef uint8_t value_type;
53+
};
54+
55+
template <>
56+
struct AttributeTraits<TILEDB_STRING_ASCII> {
57+
typedef char value_type;
58+
};
59+
60+
template <tiledb_datatype_t attribute_datatype>
61+
struct MyArray {
62+
using AttributeValueType =
63+
typename AttributeTraits<attribute_datatype>::value_type;
64+
65+
static void create_array(Context& ctx, const char* array_name) {
66+
// The array will be 4x4 with dimensions "rows" and "cols", with domain
67+
// [1,4].
68+
auto rows = Dimension::create<int32_t>(ctx, "rows", {{1, 4}}, 4);
69+
auto cols = Dimension::create<int32_t>(ctx, "columns", {{1, 4}}, 4);
70+
71+
// Create domain
72+
Domain domain(ctx);
73+
domain.add_dimension(rows);
74+
domain.add_dimension(cols);
75+
76+
// Create a single attribute "a" so each (i,j) cell can store a character
77+
Attribute a(ctx, "a", attribute_datatype);
78+
79+
// Create array schema
80+
ArraySchema schema(ctx, TILEDB_SPARSE);
81+
schema.set_domain(domain);
82+
schema.set_cell_order(TILEDB_ROW_MAJOR);
83+
schema.set_tile_order(TILEDB_ROW_MAJOR);
84+
schema.add_attribute(a);
85+
86+
// Create array
87+
Array::create(array_name, schema);
88+
}
89+
90+
static void write_array(Context& ctx, const char* array_name) {
91+
// Open array for writing
92+
Array array(ctx, array_name, TILEDB_WRITE);
93+
94+
// Data
95+
std::vector<int32_t> rows = {
96+
1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4};
97+
std::vector<int32_t> cols = {
98+
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4};
99+
std::vector<AttributeValueType> atts = {
100+
'a',
101+
'b',
102+
'c',
103+
'd',
104+
'e',
105+
'f',
106+
'g',
107+
'h',
108+
'i',
109+
'j',
110+
'k',
111+
'l',
112+
'm',
113+
'n',
114+
'o',
115+
'p'};
116+
117+
Query query(ctx, array);
118+
query.set_data_buffer("rows", rows)
119+
.set_data_buffer("columns", cols)
120+
.set_data_buffer("a", atts);
121+
122+
query.submit();
123+
array.close();
124+
}
125+
126+
static AttributeValueType query_min(Context& ctx, const char* array_name) {
127+
// note, use C API because the CPP API doesn't seem to have Min yet
128+
Array array(ctx, array_name, TILEDB_READ);
129+
130+
Query query(ctx, array);
131+
query.set_layout(TILEDB_UNORDERED);
132+
133+
QueryChannel default_channel =
134+
QueryExperimental::get_default_channel(query);
135+
ChannelOperation operation =
136+
QueryExperimental::create_unary_aggregate<MinOperator>(query, "a");
137+
default_channel.apply_aggregate("Min", operation);
138+
139+
std::vector<AttributeValueType> min(1);
140+
query.set_data_buffer("Min", min);
141+
142+
query.submit();
143+
query.finalize();
144+
145+
return min[0];
146+
}
147+
};
148+
149+
bool array_exists(Context& ctx, const char* uri) {
150+
auto object = tiledb::Object::object(ctx, uri);
151+
return (object.type() == tiledb::Object::Type::Array);
152+
}
153+
154+
TEST_CASE("SC-53334 min single value UINT8 works", "[bug][sc-53334]") {
155+
using MyArray = MyArray<TILEDB_UINT8>;
156+
157+
Context ctx;
158+
std::string uri("sc-53334-uint8");
159+
160+
if (!array_exists(ctx, uri.c_str())) {
161+
MyArray::create_array(ctx, uri.c_str());
162+
MyArray::write_array(ctx, uri.c_str());
163+
}
164+
165+
const uint8_t min = MyArray::query_min(ctx, uri.c_str());
166+
REQUIRE(min == 'a');
167+
}
168+
169+
TEST_CASE(
170+
"SC-53334 min single value STRING_ASCII does not work",
171+
"[regression][bug][sc-53334][!shouldfail]") {
172+
using MyArray = MyArray<TILEDB_STRING_ASCII>;
173+
174+
Context ctx;
175+
std::string uri("sc-53334-string-ascii");
176+
177+
if (!array_exists(ctx, uri.c_str())) {
178+
MyArray::create_array(ctx, uri.c_str());
179+
MyArray::write_array(ctx, uri.c_str());
180+
}
181+
182+
/*
183+
* This throws an exception instead of returning the correct result "a".
184+
*
185+
* "OutputBufferValidator: Aggregate fixed size buffer should be for one
186+
* element" This happens because the Min/Max ops are specialized to do
187+
* std::string as their internal result buffer, but we are looking for a
188+
* single `char` result.
189+
*
190+
* When the bug is fixed, delete `CHECK_THROWS` and `if (false)`.
191+
*/
192+
CHECK_NOTHROW(MyArray::query_min(ctx, uri.c_str()));
193+
if (false) {
194+
const char min = MyArray::query_min(ctx, uri.c_str());
195+
REQUIRE(min == 'a');
196+
}
197+
}

0 commit comments

Comments
 (0)