Skip to content

Commit cdfa6ff

Browse files
authored
Current domain: disallow reading outside of current domain. (#5168)
This change makes sure a user cannot read data outside of the current domain. This can happen in array where the current domain was not initially set and later added through schema evolution. --- TYPE: FEATURE DESC: Current domain: disallow reading outside of current domain.
1 parent 4c76857 commit cdfa6ff

File tree

4 files changed

+194
-6
lines changed

4 files changed

+194
-6
lines changed

test/src/test-cppapi-current-domain.cc

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,130 @@ TEST_CASE_METHOD(
354354
vfs.remove_dir(array_name);
355355
}
356356
}
357+
358+
TEST_CASE_METHOD(
359+
CurrentDomainFx,
360+
"C++ API: CurrentDomain - Read cells written outside of shape",
361+
"[cppapi][ArraySchema][currentDomain][luc]") {
362+
const std::string array_name = "test_current_domain_read";
363+
364+
tiledb::VFS vfs(ctx_);
365+
if (vfs.is_dir(array_name)) {
366+
vfs.remove_dir(array_name);
367+
}
368+
369+
// Create domain
370+
tiledb::Domain domain(ctx_);
371+
auto d1 = tiledb::Dimension::create<int32_t>(ctx_, "dim1", {{0, 100}}, 10);
372+
auto d2 = tiledb::Dimension::create(
373+
ctx_, "dim2", TILEDB_STRING_ASCII, nullptr, nullptr);
374+
domain.add_dimension(d1);
375+
domain.add_dimension(d2);
376+
377+
// Create array schema.
378+
tiledb::ArraySchema schema(ctx_, TILEDB_SPARSE);
379+
schema.set_domain(domain);
380+
schema.add_attribute(tiledb::Attribute::create<int>(ctx_, "a"));
381+
382+
// Create array
383+
tiledb::Array::create(array_name, schema);
384+
385+
tiledb::Array array(ctx_, array_name, TILEDB_WRITE);
386+
387+
// Some of the data here is outside of the current domain we will set later.
388+
std::vector<int32_t> dim1 = {12, 14, 16, 18};
389+
std::vector<char> dim2 = {'b', 'a', 'b', 'c'};
390+
std::vector<uint64_t> dim2_offsets = {0, 1, 2, 3};
391+
std::vector<int> a1 = {1, 2, 3, 4};
392+
393+
// All data in current domain.
394+
tiledb::Query query(ctx_, array, TILEDB_WRITE);
395+
query.set_layout(TILEDB_UNORDERED)
396+
.set_data_buffer("a", a1)
397+
.set_data_buffer("dim1", dim1)
398+
.set_offsets_buffer("dim2", dim2_offsets)
399+
.set_data_buffer("dim2", dim2);
400+
query.submit();
401+
array.close();
402+
403+
// Create new currentDomain to allow all data
404+
tiledb::CurrentDomain current_domain(ctx_);
405+
int range[] = {10, 16};
406+
tiledb::NDRectangle ndrect(ctx_, domain);
407+
ndrect.set_range(0, range[0], range[1]);
408+
ndrect.set_range(1, std::string("b"), std::string("c"));
409+
current_domain.set_ndrectangle(ndrect);
410+
411+
// Schema evolution
412+
tiledb::ArraySchemaEvolution se(ctx_);
413+
se.expand_current_domain(current_domain);
414+
se.array_evolve(array_name);
415+
416+
std::vector<int32_t> dim1_read(100);
417+
std::vector<char> dim2_read(100);
418+
std::vector<uint64_t> dim2_offsets_read(100);
419+
std::vector<int> a1_read(100);
420+
421+
// Now try to read data.
422+
tiledb::Array array_read(ctx_, array_name, TILEDB_READ);
423+
tiledb::Query query_read(ctx_, array_read, TILEDB_READ);
424+
query_read.set_layout(TILEDB_UNORDERED)
425+
.set_data_buffer("a", a1_read)
426+
.set_data_buffer("dim1", dim1_read)
427+
.set_offsets_buffer("dim2", dim2_offsets_read)
428+
.set_data_buffer("dim2", dim2_read);
429+
query_read.submit();
430+
431+
// Validate we got two results.
432+
auto res = query_read.result_buffer_elements();
433+
CHECK(res["a"].second == 2);
434+
CHECK(res["dim1"].second == 2);
435+
CHECK(res["dim2"].first == 2);
436+
CHECK(res["dim2"].second == 2);
437+
CHECK(a1_read[0] == 1);
438+
CHECK(a1_read[1] == 3);
439+
CHECK(dim1_read[0] == 12);
440+
CHECK(dim1_read[1] == 16);
441+
CHECK(dim2_offsets_read[0] == 0);
442+
CHECK(dim2_offsets_read[1] == 1);
443+
CHECK(dim2_read[0] == 'b');
444+
CHECK(dim2_read[1] == 'b');
445+
446+
// Now try to read data with ranges outside of current domain
447+
tiledb::Query query_read2(ctx_, array_read, TILEDB_READ);
448+
tiledb::Subarray subarray_read2(ctx_, array_read);
449+
450+
subarray_read2.add_range<int32_t>(0, 12, 16).add_range(0, 17, 20);
451+
query_read2.set_layout(TILEDB_UNORDERED)
452+
.set_data_buffer("a", a1_read)
453+
.set_data_buffer("dim1", dim1_read)
454+
.set_offsets_buffer("dim2", dim2_offsets_read)
455+
.set_data_buffer("dim2", dim2_read)
456+
.set_subarray(subarray_read2);
457+
CHECK_THROWS_WITH(
458+
query_read2.submit(),
459+
Catch::Matchers::ContainsSubstring(
460+
"A range was set outside of the current domain."));
461+
462+
tiledb::Query query_read3(ctx_, array_read, TILEDB_READ);
463+
tiledb::Subarray subarray_read3(ctx_, array_read);
464+
465+
subarray_read3.add_range(1, std::string("a"), std::string("b"));
466+
query_read3.set_layout(TILEDB_UNORDERED)
467+
.set_data_buffer("a", a1_read)
468+
.set_data_buffer("dim1", dim1_read)
469+
.set_offsets_buffer("dim2", dim2_offsets_read)
470+
.set_data_buffer("dim2", dim2_read)
471+
.set_subarray(subarray_read3);
472+
CHECK_THROWS_WITH(
473+
query_read3.submit(),
474+
Catch::Matchers::ContainsSubstring(
475+
"A range was set outside of the current domain."));
476+
477+
array_read.close();
478+
479+
// Clean up.
480+
if (vfs.is_dir(array_name)) {
481+
vfs.remove_dir(array_name);
482+
}
483+
}

tiledb/sm/array_schema/current_domain.cc

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,19 +202,29 @@ bool CurrentDomain::covered(const NDRange& ndranges) const {
202202
}
203203

204204
bool CurrentDomain::includes(const NDRange& non_empty_domain) const {
205+
for (dimension_size_type d = 0; d < non_empty_domain.size(); ++d) {
206+
if (!includes(d, non_empty_domain[d])) {
207+
return false;
208+
}
209+
}
210+
211+
return true;
212+
}
213+
214+
bool CurrentDomain::includes(
215+
const dimension_size_type d, const Range& range) const {
205216
switch (type_) {
206217
case CurrentDomainType::NDRECTANGLE: {
207-
for (unsigned d = 0; d < non_empty_domain.size(); ++d) {
208-
auto dim = ndrectangle_->domain()->dimension_ptr(d);
209-
if (!dim->covered(non_empty_domain[d], ndrectangle_->get_range(d))) {
210-
return false;
211-
}
218+
auto dim = ndrectangle_->domain()->dimension_ptr(d);
219+
if (!dim->covered(range, ndrectangle_->get_range(d))) {
220+
return false;
212221
}
213222
return true;
214223
}
215224
default: {
216225
throw std::runtime_error(
217-
"Unable to execute this current domain operation because one of the "
226+
"Unable to execute this current domain operation because one of "
227+
"the "
218228
"current domains passed has an unsupported type " +
219229
current_domain_type_str(type_));
220230
}

tiledb/sm/array_schema/current_domain.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ class Domain;
5050
/** Defines an array current domain */
5151
class CurrentDomain {
5252
public:
53+
/**
54+
* Size type for the number of dimensions of an array and for dimension
55+
* indices.
56+
*
57+
* Note: This should be the same as `Domain::dimension_size_type`. We're
58+
* not including `domain.h`, otherwise we'd use that definition here.
59+
*/
60+
using dimension_size_type = unsigned int;
61+
5362
/* ********************************* */
5463
/* CONSTRUCTORS & DESTRUCTORS */
5564
/* ********************************* */
@@ -187,6 +196,16 @@ class CurrentDomain {
187196
*/
188197
bool includes(const NDRange& non_empty_domain) const;
189198

199+
/**
200+
* Checks if this current domain fully contains the range for a specific
201+
* dimension.
202+
*
203+
* @param d Dimension index to check for.
204+
* @param range Range to validate.
205+
* @return True if the range is included for the specific dimension
206+
*/
207+
bool includes(const dimension_size_type d, const Range& range) const;
208+
190209
/**
191210
* Perform various checks to ensure the current domain is coherent with the
192211
* array schema

tiledb/sm/query/query.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "tiledb/common/memory.h"
3838
#include "tiledb/common/memory_tracker.h"
3939
#include "tiledb/sm/array/array.h"
40+
#include "tiledb/sm/array_schema/current_domain.h"
4041
#include "tiledb/sm/array_schema/dimension_label.h"
4142
#include "tiledb/sm/enums/query_status.h"
4243
#include "tiledb/sm/enums/query_type.h"
@@ -835,6 +836,37 @@ Status Query::process() {
835836
condition_->rewrite_enumeration_conditions(array_schema());
836837
}
837838

839+
if (type_ == QueryType::READ) {
840+
auto cd = array_schema_->get_current_domain();
841+
if (!cd->empty()) {
842+
// See if any data was written outside of the current domain.
843+
bool all_ned_contained_in_current_domain = true;
844+
for (auto& meta : fragment_metadata_) {
845+
if (!cd->includes(meta->non_empty_domain())) {
846+
all_ned_contained_in_current_domain = false;
847+
}
848+
}
849+
850+
for (Domain::dimension_size_type d = 0; d < array_schema_->dim_num();
851+
d++) {
852+
if (subarray_.is_set(d)) {
853+
// Make sure all ranges are contained in the current domain.
854+
for (auto& range : subarray_.ranges_for_dim(d)) {
855+
if (!cd->includes(d, range)) {
856+
throw QueryException(
857+
"A range was set outside of the current domain.");
858+
}
859+
}
860+
} else if (!all_ned_contained_in_current_domain) {
861+
// Add ranges to make sure all data read is contained in the current
862+
// domain.
863+
auto range_copy = cd->ndrectangle()->get_range(d);
864+
subarray_.add_range(d, std::move(range_copy));
865+
}
866+
}
867+
}
868+
}
869+
838870
// Update query status.
839871
status_ = QueryStatus::INPROGRESS;
840872

0 commit comments

Comments
 (0)