Skip to content

Commit 4857d60

Browse files
fix support for support multiple cells per tile
1 parent 94de2d8 commit 4857d60

File tree

3 files changed

+113
-8
lines changed

3 files changed

+113
-8
lines changed

test/src/unit-cppapi-max-fragment-size.cc

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,3 +596,94 @@ TEST_CASE(
596596
CHECK(a2[i] == i + 1);
597597
}
598598
}
599+
600+
TEST_CASE(
601+
"Setting max_fragment_size in Dense consolidation one dim",
602+
"[global-order-writer]") {
603+
std::string array_name = "cpp_max_fragment_size_bug";
604+
Context ctx;
605+
606+
auto cleanup = [&]() {
607+
auto obj = Object::object(ctx, array_name);
608+
if (obj.type() == Object::Type::Array) {
609+
Object::remove(ctx, array_name);
610+
}
611+
};
612+
613+
cleanup();
614+
615+
// Remove the array at the end of this test.
616+
// ScopedExecutor deferred(cleanup);
617+
618+
// Create an array with exactly 9 tiles and tile extend 1
619+
Domain domain(ctx);
620+
ArraySchema schema(ctx, TILEDB_DENSE);
621+
auto d1 = tiledb::Dimension::create<int32_t>(ctx, "d1", {{0, 8}}, 3);
622+
domain.add_dimension(d1);
623+
624+
auto a1 = tiledb::Attribute::create<int32_t>(ctx, "a");
625+
schema.add_attribute(a1);
626+
627+
schema.set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});
628+
schema.set_domain(domain);
629+
630+
Array::create(array_name, schema);
631+
632+
// Populate array with data from 1 to 9
633+
int value = 0;
634+
for (int i = 0; i < 9; i+=3) {
635+
Array array(ctx, array_name, TILEDB_WRITE);
636+
Query query(ctx, array);
637+
query.set_layout(TILEDB_ROW_MAJOR);
638+
tiledb::Subarray sub(ctx, array);
639+
sub.set_subarray({i, i + 2});
640+
query.set_subarray(sub);
641+
std::vector<int32_t> data = {++value, ++value, ++value};
642+
query.set_data_buffer("a", data);
643+
query.submit();
644+
array.close();
645+
}
646+
647+
// Read data to validate write and num of fragments.
648+
// CHECK(tiledb::test::num_fragments(array_name) == 3);
649+
650+
Array array(ctx, array_name, TILEDB_READ);
651+
tiledb::Subarray sub(ctx, array);
652+
sub.set_subarray({0, 8});
653+
std::vector<int32_t> a(9);
654+
Query query(ctx, array, TILEDB_READ);
655+
query.set_subarray(sub).set_layout(TILEDB_ROW_MAJOR).set_data_buffer("a", a);
656+
query.submit();
657+
array.close();
658+
659+
for (int i = 0; i < 9; i++) {
660+
CHECK(a[i] == i + 1);
661+
}
662+
663+
// Consolidate with a size limitation for the fragment. This will result in
664+
// the creation of two new fragments.
665+
tiledb::Config cfg;
666+
cfg.set("sm.consolidation.max_fragment_size", "80");
667+
ctx = Context(cfg);
668+
Array::consolidate(ctx, array_name);
669+
Array::vacuum(ctx, array_name);
670+
671+
// Check that we now have 2 fragments instead of 3
672+
CHECK(tiledb::test::num_fragments(array_name) == 2);
673+
674+
// Read data to validate correctness
675+
Array array2(ctx, array_name, TILEDB_READ);
676+
tiledb::Subarray sub2(ctx, array2);
677+
sub2.set_subarray({0, 8});
678+
std::vector<int32_t> a2(9);
679+
Query query2(ctx, array2, TILEDB_READ);
680+
query2.set_subarray(sub2)
681+
.set_layout(TILEDB_ROW_MAJOR)
682+
.set_data_buffer("a", a2);
683+
query2.submit();
684+
array2.close();
685+
686+
for (int i = 0; i < 9; i++) {
687+
CHECK(a2[i] == i + 1);
688+
}
689+
}

tiledb/sm/query/writers/global_order_writer.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,8 +1434,11 @@ uint64_t GlobalOrderWriter::num_tiles_to_write(
14341434
uint64_t GlobalOrderWriter::num_tiles_per_row() {
14351435
auto dim_num = array_schema_.dim_num();
14361436
uint64_t ret = 1;
1437+
14371438
for (unsigned d = 1; d < dim_num; ++d) {
1438-
// skip first dim. todo Explain
1439+
// Skip first dim. We want to calculate how many tiles can fit in one row.
1440+
// To do that we skip the first dim and multiply the range / extend of the
1441+
// other dimensions
14391442
auto dim{array_schema_.dimension_ptr(d)};
14401443
auto dim_dom = dim->domain();
14411444
ret *= dim->domain_range(dim_dom) / dim->tile_extent().rvalue_as<int32_t>();
@@ -1448,20 +1451,24 @@ uint64_t GlobalOrderWriter::num_tiles_per_row() {
14481451
NDRange GlobalOrderWriter::ndranges_after_split(uint64_t num) {
14491452
uint64_t tiles_per_row = num_tiles_per_row();
14501453
auto dim_num = array_schema_.dim_num();
1454+
uint64_t cells_in_tile = array_schema_.domain().cell_num_per_tile();
14511455
NDRange nd;
14521456
nd.reserve(dim_num);
14531457

14541458
if (num % tiles_per_row != 0) {
14551459
throw GlobalOrderWriterException(
1456-
"This fragment target size is not possible please try something else "); // todo fix
1460+
"The target fragment size cannot be achieved. Please try using a "
1461+
"different size, or there might be a misconfiguration in the array "
1462+
"schema.");
14571463
}
14581464

14591465
// Calculate how many rows we will write in the current fragment
1460-
uint64_t rows_to_write = num / tiles_per_row;
1466+
uint64_t rows_of_tiles_to_write = num / tiles_per_row;
14611467

14621468
// Create the range for the index dim (first).
1463-
int start = rows_written_;
1464-
int end = start + rows_to_write - 1;
1469+
int start =
1470+
rows_written_ * cells_in_tile; // todo start from the dim_dom start
1471+
int end = start + (rows_of_tiles_to_write * cells_in_tile) - 1;
14651472
Range range(&start, &end, sizeof(int));
14661473
nd.emplace_back(range);
14671474

@@ -1474,7 +1481,7 @@ NDRange GlobalOrderWriter::ndranges_after_split(uint64_t num) {
14741481
}
14751482

14761483
// add rows written to the cache
1477-
rows_written_ += rows_to_write;
1484+
rows_written_ += rows_of_tiles_to_write;
14781485

14791486
return nd;
14801487
}

tiledb/sm/query/writers/global_order_writer.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,19 @@ class GlobalOrderWriter : public WriterBase {
392392
tdb::pmr::unordered_map<std::string, WriterTileTupleVector>& tiles);
393393

394394
/**
395-
* Return the number of tiles a single row can hold
395+
* Create new ndranges by splitting the first dimension based on the number of
396+
* tiles we need to write
397+
* @param num The number of tiles we need to write.
396398
*
397-
* @return Number of tiles.
398399
*/
399400
NDRange ndranges_after_split(uint64_t num);
400401

402+
/**
403+
* Return the number of tiles a single row can hold. More specifically, the
404+
* number of tiles all dimensions except the first can hold.
405+
*
406+
* @return Number of tiles.
407+
*/
401408
uint64_t num_tiles_per_row();
402409

403410
/**

0 commit comments

Comments
 (0)