Skip to content

Commit cd36940

Browse files
authored
Users should be able to create an enumeration of which the sole value is the empty string (#5460)
Users should be able to create an enumeration of which the sole value is the empty string Context: * [[sc-53027]](https://app.shortcut.com/tiledb-inc/story/53027/users-should-be-able-to-create-an-enumeration-of-which-the-sole-value-is-the-empty-string) * single-cell-data/TileDB-SOMA#2822 * single-cell-data/TileDB-SOMA#2859 * single-cell-data/TileDB-SOMA#2858 --- TYPE: IMPROVEMENT DESC: Users should be able to create an enumeration of which the sole value is the empty string
1 parent ce93f0b commit cd36940

File tree

3 files changed

+197
-12
lines changed

3 files changed

+197
-12
lines changed

test/src/unit-cppapi-enumerations.cc

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,22 @@ TEST_CASE_METHOD(
112112
REQUIRE(data == values);
113113
}
114114

115+
TEST_CASE_METHOD(
116+
CPPEnumerationFx,
117+
"CPP: Enumeration API - Create All-Empty-String",
118+
"[enumeration][create][all-empty]") {
119+
std::vector<std::string> values = {""};
120+
auto enmr = Enumeration::create(ctx_, enmr_name, values);
121+
REQUIRE(enmr.ptr() != nullptr);
122+
REQUIRE(enmr.name() == enmr_name);
123+
REQUIRE(enmr.type() == TILEDB_STRING_ASCII);
124+
REQUIRE(enmr.cell_val_num() == TILEDB_VAR_NUM);
125+
REQUIRE(enmr.ordered() == false);
126+
127+
auto data = enmr.as_vector<std::string>();
128+
REQUIRE(data == values);
129+
}
130+
115131
TEST_CASE_METHOD(
116132
CPPEnumerationFx,
117133
"CPP: Enumeration API - Create Ordered",
@@ -160,6 +176,38 @@ TEST_CASE_METHOD(
160176
REQUIRE(data == final_values);
161177
}
162178

179+
TEST_CASE_METHOD(
180+
CPPEnumerationFx,
181+
"CPP: Enumeration API - Extend Non-Empty With All-Empty",
182+
"[enumeration][extend][all-empty]") {
183+
std::vector<std::string> init_values = {"fred", "wilma"};
184+
std::vector<std::string> add_values = {""};
185+
std::vector<std::string> final_values = {"fred", "wilma", ""};
186+
auto enmr1 = Enumeration::create(ctx_, enmr_name, init_values, true);
187+
auto enmr2 = enmr1.extend(add_values);
188+
189+
REQUIRE(enmr2.ptr()->is_extension_of(enmr1.ptr().get()));
190+
191+
auto data = enmr2.as_vector<std::string>();
192+
REQUIRE(data == final_values);
193+
}
194+
195+
TEST_CASE_METHOD(
196+
CPPEnumerationFx,
197+
"CPP: Enumeration API - Extend All-Empty With Non-Empty",
198+
"[enumeration][extend][all-empty]") {
199+
std::vector<std::string> init_values = {""};
200+
std::vector<std::string> add_values = {"abc", "def"};
201+
std::vector<std::string> final_values = {"", "abc", "def"};
202+
auto enmr1 = Enumeration::create(ctx_, enmr_name, init_values, true);
203+
auto enmr2 = enmr1.extend(add_values);
204+
205+
REQUIRE(enmr2.ptr()->is_extension_of(enmr1.ptr().get()));
206+
207+
auto data = enmr2.as_vector<std::string>();
208+
REQUIRE(data == final_values);
209+
}
210+
163211
TEST_CASE_METHOD(
164212
CPPEnumerationFx,
165213
"CPP: Enumeration API - Fixed Size Empty Vector extension",

tiledb/api/c_api/enumeration/test/unit_capi_enumeration.cc

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,58 @@ struct VarSizeEnumeration {
9191
tiledb_enumeration_t* enumeration_;
9292
};
9393

94+
struct NotAllEmptyStringEnumeration {
95+
NotAllEmptyStringEnumeration() {
96+
const char* values = "foobarbingobango";
97+
uint64_t offsets[5] = {0, 3, 3, 6, 11};
98+
auto rc = tiledb_enumeration_alloc(
99+
ctx_.context,
100+
"an_enumeration",
101+
TILEDB_STRING_UTF8,
102+
TILEDB_VAR_NUM,
103+
0,
104+
values,
105+
strlen(values),
106+
offsets,
107+
5 * sizeof(uint64_t),
108+
&enumeration_);
109+
REQUIRE(rc == TILEDB_OK);
110+
}
111+
112+
~NotAllEmptyStringEnumeration() {
113+
tiledb_enumeration_free(&enumeration_);
114+
}
115+
116+
ordinary_context ctx_;
117+
tiledb_enumeration_t* enumeration_;
118+
};
119+
120+
struct AllEmptyStringEnumeration {
121+
AllEmptyStringEnumeration() {
122+
const char* values = "";
123+
uint64_t offsets[1] = {0};
124+
auto rc = tiledb_enumeration_alloc(
125+
ctx_.context,
126+
"an_enumeration",
127+
TILEDB_STRING_UTF8,
128+
TILEDB_VAR_NUM,
129+
0,
130+
values,
131+
strlen(values),
132+
offsets,
133+
1 * sizeof(uint64_t),
134+
&enumeration_);
135+
REQUIRE(rc == TILEDB_OK);
136+
}
137+
138+
~AllEmptyStringEnumeration() {
139+
tiledb_enumeration_free(&enumeration_);
140+
}
141+
142+
ordinary_context ctx_;
143+
tiledb_enumeration_t* enumeration_;
144+
};
145+
94146
TEST_CASE(
95147
"C API: tiledb_enumeration_alloc argument validation",
96148
"[capi][enumeration]") {
@@ -273,6 +325,8 @@ TEST_CASE(
273325
"[capi][enumeration]") {
274326
FixedSizeEnumeration fe;
275327
VarSizeEnumeration ve;
328+
NotAllEmptyStringEnumeration ne;
329+
AllEmptyStringEnumeration ae;
276330
tiledb_datatype_t dt;
277331

278332
SECTION("success") {
@@ -284,6 +338,14 @@ TEST_CASE(
284338
rc = tiledb_enumeration_get_type(ve.ctx_.context, ve.enumeration_, &dt);
285339
REQUIRE(rc == TILEDB_OK);
286340
REQUIRE(dt == TILEDB_STRING_UTF8);
341+
342+
rc = tiledb_enumeration_get_type(ne.ctx_.context, ne.enumeration_, &dt);
343+
REQUIRE(rc == TILEDB_OK);
344+
REQUIRE(dt == TILEDB_STRING_UTF8);
345+
346+
rc = tiledb_enumeration_get_type(ae.ctx_.context, ae.enumeration_, &dt);
347+
REQUIRE(rc == TILEDB_OK);
348+
REQUIRE(dt == TILEDB_STRING_UTF8);
287349
}
288350

289351
SECTION("failure - invalid context") {
@@ -308,6 +370,8 @@ TEST_CASE(
308370
"[capi][enumeration]") {
309371
FixedSizeEnumeration fe;
310372
VarSizeEnumeration ve;
373+
NotAllEmptyStringEnumeration ne;
374+
AllEmptyStringEnumeration ae;
311375
uint32_t cvn;
312376

313377
SECTION("success") {
@@ -320,6 +384,16 @@ TEST_CASE(
320384
ve.ctx_.context, ve.enumeration_, &cvn);
321385
REQUIRE(rc == TILEDB_OK);
322386
REQUIRE(cvn == TILEDB_VAR_NUM);
387+
388+
rc = tiledb_enumeration_get_cell_val_num(
389+
ne.ctx_.context, ne.enumeration_, &cvn);
390+
REQUIRE(rc == TILEDB_OK);
391+
REQUIRE(cvn == TILEDB_VAR_NUM);
392+
393+
rc = tiledb_enumeration_get_cell_val_num(
394+
ae.ctx_.context, ae.enumeration_, &cvn);
395+
REQUIRE(rc == TILEDB_OK);
396+
REQUIRE(cvn == TILEDB_VAR_NUM);
323397
}
324398

325399
SECTION("failure - invalid context") {
@@ -346,6 +420,8 @@ TEST_CASE(
346420
"[capi][enumeration]") {
347421
FixedSizeEnumeration fe;
348422
VarSizeEnumeration ve;
423+
NotAllEmptyStringEnumeration ne;
424+
AllEmptyStringEnumeration ae;
349425
int o;
350426

351427
SECTION("success") {
@@ -357,6 +433,14 @@ TEST_CASE(
357433
rc = tiledb_enumeration_get_ordered(ve.ctx_.context, ve.enumeration_, &o);
358434
REQUIRE(rc == TILEDB_OK);
359435
REQUIRE(!o);
436+
437+
rc = tiledb_enumeration_get_ordered(ne.ctx_.context, ne.enumeration_, &o);
438+
REQUIRE(rc == TILEDB_OK);
439+
REQUIRE(!o);
440+
441+
rc = tiledb_enumeration_get_ordered(ae.ctx_.context, ae.enumeration_, &o);
442+
REQUIRE(rc == TILEDB_OK);
443+
REQUIRE(!o);
360444
}
361445

362446
SECTION("failure - invalid context") {
@@ -381,11 +465,15 @@ TEST_CASE(
381465
"[capi][enumeration]") {
382466
FixedSizeEnumeration fe;
383467
VarSizeEnumeration ve;
468+
NotAllEmptyStringEnumeration ne;
469+
AllEmptyStringEnumeration ae;
384470
const void* d;
385471
uint64_t ds;
386472

387473
uint32_t fixed_expect[5] = {1, 2, 3, 4, 5};
388474
const char* var_expect = "foobarbazbingobango";
475+
const char* not_all_empty_expect = "foobarbingobango";
476+
const char* all_empty_expect = "";
389477

390478
SECTION("success") {
391479
auto rc =
@@ -398,6 +486,18 @@ TEST_CASE(
398486
REQUIRE(rc == TILEDB_OK);
399487
REQUIRE(std::memcmp(var_expect, d, strlen(var_expect)) == 0);
400488
REQUIRE(ds == strlen(var_expect));
489+
490+
rc = tiledb_enumeration_get_data(ne.ctx_.context, ne.enumeration_, &d, &ds);
491+
REQUIRE(rc == TILEDB_OK);
492+
REQUIRE(
493+
std::memcmp(not_all_empty_expect, d, strlen(not_all_empty_expect)) ==
494+
0);
495+
REQUIRE(ds == strlen(not_all_empty_expect));
496+
497+
rc = tiledb_enumeration_get_data(ae.ctx_.context, ae.enumeration_, &d, &ds);
498+
REQUIRE(rc == TILEDB_OK);
499+
REQUIRE(std::memcmp(all_empty_expect, d, strlen(all_empty_expect)) == 0);
500+
REQUIRE(ds == strlen(all_empty_expect));
401501
}
402502

403503
SECTION("failure - invalid context") {
@@ -428,10 +528,14 @@ TEST_CASE(
428528
"[capi][enumeration]") {
429529
FixedSizeEnumeration fe;
430530
VarSizeEnumeration ve;
531+
NotAllEmptyStringEnumeration ne;
532+
AllEmptyStringEnumeration ae;
431533
const void* o;
432534
uint64_t os;
433535

434536
uint64_t var_expect[5] = {0, 3, 6, 9, 14};
537+
uint64_t ne_expect[5] = {0, 3, 3, 6, 11};
538+
uint64_t ae_expect[5] = {0};
435539

436540
SECTION("success") {
437541
auto rc = tiledb_enumeration_get_offsets(
@@ -445,6 +549,18 @@ TEST_CASE(
445549
REQUIRE(rc == TILEDB_OK);
446550
REQUIRE(std::memcmp(var_expect, o, sizeof(uint64_t) * 5) == 0);
447551
REQUIRE(os == sizeof(uint64_t) * 5);
552+
553+
rc = tiledb_enumeration_get_offsets(
554+
ne.ctx_.context, ne.enumeration_, &o, &os);
555+
REQUIRE(rc == TILEDB_OK);
556+
REQUIRE(std::memcmp(ne_expect, o, sizeof(uint64_t) * 5) == 0);
557+
REQUIRE(os == sizeof(uint64_t) * 5);
558+
559+
rc = tiledb_enumeration_get_offsets(
560+
ae.ctx_.context, ae.enumeration_, &o, &os);
561+
REQUIRE(rc == TILEDB_OK);
562+
REQUIRE(std::memcmp(ae_expect, o, sizeof(uint64_t) * 1) == 0);
563+
REQUIRE(os == sizeof(uint64_t) * 1);
448564
}
449565

450566
SECTION("failure - invalid context") {

tiledb/sm/array_schema/enumeration.cc

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,17 @@ Enumeration::Enumeration(
136136
// non-zero data size that is greater than or equal to the last
137137
// offset provided.
138138
if (data == nullptr) {
139-
throw EnumerationException(
140-
"Invalid data input, nullptr provided when the provided offsets "
141-
"require data.");
139+
// Users need to be able to create an enumeration containing one value
140+
// which is the empty string.
141+
//
142+
// The std::vector<uint8_t> foo(0, 0) constructor at our callsite
143+
// results in a foo.data() which is nullptr and foo.size() which is
144+
// zero. So if data is nullptr, we only fail if data_size is not zero.
145+
if (data_size != 0) {
146+
throw EnumerationException(
147+
"Invalid data input, nullptr provided when the provided offsets "
148+
"require data.");
149+
}
142150
}
143151

144152
if (data_size < offset_values[num_offsets - 1]) {
@@ -235,14 +243,24 @@ shared_ptr<const Enumeration> Enumeration::extend(
235243
uint64_t data_size,
236244
const void* offsets,
237245
uint64_t offsets_size) const {
238-
if (data == nullptr) {
239-
throw EnumerationException(
240-
"Unable to extend an enumeration without a data buffer.");
241-
}
246+
if (data == nullptr && data_size == 0) {
247+
// This is OK
248+
//
249+
// Users need to be able to create an enumeration containing
250+
// one value which is the empty string.
251+
//
252+
// The std::vector<uint8_t> foo(0, 0) constructor at our callsite results
253+
// in a foo.data() which is nullptr and foo.size() which is zero.
254+
} else {
255+
if (data == nullptr) {
256+
throw EnumerationException(
257+
"Unable to extend an enumeration without a data buffer.");
258+
}
242259

243-
if (data_size == 0) {
244-
throw EnumerationException(
245-
"Unable to extend an enumeration with a zero sized data buffer.");
260+
if (data_size == 0) {
261+
throw EnumerationException(
262+
"Unable to extend an enumeration with a zero sized data buffer.");
263+
}
246264
}
247265

248266
if (var_size()) {
@@ -274,7 +292,9 @@ shared_ptr<const Enumeration> Enumeration::extend(
274292

275293
Buffer new_data(data_.size() + data_size);
276294
throw_if_not_ok(new_data.write(data_.data(), data_.size()));
277-
throw_if_not_ok(new_data.write(data, data_size));
295+
if (data_size > 0) {
296+
throw_if_not_ok(new_data.write(data, data_size));
297+
}
278298

279299
const void* new_offsets_ptr = nullptr;
280300
uint64_t new_offsets_size = 0;
@@ -329,7 +349,8 @@ bool Enumeration::is_extension_of(shared_ptr<const Enumeration> other) const {
329349
}
330350

331351
auto other_data = other->data();
332-
if (data_.size() <= other_data.size()) {
352+
// Not <=, since a single empty string can be added as an extension.
353+
if (data_.size() < other_data.size()) {
333354
return false;
334355
}
335356

0 commit comments

Comments
 (0)