Skip to content

Commit 26b6af3

Browse files
fix support for support multiple cells per tile
1 parent 7f51ff1 commit 26b6af3

File tree

3 files changed

+113
-8
lines changed

3 files changed

+113
-8
lines changed

test/src/unit-cppapi-max-fragment-size.cc

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,94 @@ TEST_CASE(
597597
CHECK(a2[i] == i + 1);
598598
}
599599
}
600+
601+
TEST_CASE(
602+
"Setting max_fragment_size in Dense consolidation one dim",
603+
"[global-order-writer]") {
604+
std::string array_name = "cpp_max_fragment_size_bug";
605+
Context ctx;
606+
607+
auto cleanup = [&]() {
608+
auto obj = Object::object(ctx, array_name);
609+
if (obj.type() == Object::Type::Array) {
610+
Object::remove(ctx, array_name);
611+
}
612+
};
613+
614+
cleanup();
615+
616+
// Remove the array at the end of this test.
617+
// ScopedExecutor deferred(cleanup);
618+
619+
// Create an array with exactly 9 tiles and tile extend 1
620+
Domain domain(ctx);
621+
ArraySchema schema(ctx, TILEDB_DENSE);
622+
auto d1 = tiledb::Dimension::create<int32_t>(ctx, "d1", {{0, 8}}, 3);
623+
domain.add_dimension(d1);
624+
625+
auto a1 = tiledb::Attribute::create<int32_t>(ctx, "a");
626+
schema.add_attribute(a1);
627+
628+
schema.set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});
629+
schema.set_domain(domain);
630+
631+
Array::create(array_name, schema);
632+
633+
// Populate array with data from 1 to 9
634+
int value = 0;
635+
for (int i = 0; i < 9; i+=3) {
636+
Array array(ctx, array_name, TILEDB_WRITE);
637+
Query query(ctx, array);
638+
query.set_layout(TILEDB_ROW_MAJOR);
639+
tiledb::Subarray sub(ctx, array);
640+
sub.set_subarray({i, i + 2});
641+
query.set_subarray(sub);
642+
std::vector<int32_t> data = {++value, ++value, ++value};
643+
query.set_data_buffer("a", data);
644+
query.submit();
645+
array.close();
646+
}
647+
648+
// Read data to validate write and num of fragments.
649+
// CHECK(tiledb::test::num_fragments(array_name) == 3);
650+
651+
Array array(ctx, array_name, TILEDB_READ);
652+
tiledb::Subarray sub(ctx, array);
653+
sub.set_subarray({0, 8});
654+
std::vector<int32_t> a(9);
655+
Query query(ctx, array, TILEDB_READ);
656+
query.set_subarray(sub).set_layout(TILEDB_ROW_MAJOR).set_data_buffer("a", a);
657+
query.submit();
658+
array.close();
659+
660+
for (int i = 0; i < 9; i++) {
661+
CHECK(a[i] == i + 1);
662+
}
663+
664+
// Consolidate with a size limitation for the fragment. This will result in
665+
// the creation of two new fragments.
666+
tiledb::Config cfg;
667+
cfg.set("sm.consolidation.max_fragment_size", "80");
668+
ctx = Context(cfg);
669+
Array::consolidate(ctx, array_name);
670+
Array::vacuum(ctx, array_name);
671+
672+
// Check that we now have 2 fragments instead of 3
673+
CHECK(tiledb::test::num_fragments(array_name) == 2);
674+
675+
// Read data to validate correctness
676+
Array array2(ctx, array_name, TILEDB_READ);
677+
tiledb::Subarray sub2(ctx, array2);
678+
sub2.set_subarray({0, 8});
679+
std::vector<int32_t> a2(9);
680+
Query query2(ctx, array2, TILEDB_READ);
681+
query2.set_subarray(sub2)
682+
.set_layout(TILEDB_ROW_MAJOR)
683+
.set_data_buffer("a", a2);
684+
query2.submit();
685+
array2.close();
686+
687+
for (int i = 0; i < 9; i++) {
688+
CHECK(a2[i] == i + 1);
689+
}
690+
}

tiledb/sm/query/writers/global_order_writer.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,8 +1435,11 @@ uint64_t GlobalOrderWriter::num_tiles_to_write(
14351435
uint64_t GlobalOrderWriter::num_tiles_per_row() {
14361436
auto dim_num = array_schema_.dim_num();
14371437
uint64_t ret = 1;
1438+
14381439
for (unsigned d = 1; d < dim_num; ++d) {
1439-
// skip first dim. todo Explain
1440+
// Skip first dim. We want to calculate how many tiles can fit in one row.
1441+
// To do that we skip the first dim and multiply the range / extend of the
1442+
// other dimensions
14401443
auto dim{array_schema_.dimension_ptr(d)};
14411444
auto dim_dom = dim->domain();
14421445
ret *= dim->domain_range(dim_dom) / dim->tile_extent().rvalue_as<int32_t>();
@@ -1449,20 +1452,24 @@ uint64_t GlobalOrderWriter::num_tiles_per_row() {
14491452
NDRange GlobalOrderWriter::ndranges_after_split(uint64_t num) {
14501453
uint64_t tiles_per_row = num_tiles_per_row();
14511454
auto dim_num = array_schema_.dim_num();
1455+
uint64_t cells_in_tile = array_schema_.domain().cell_num_per_tile();
14521456
NDRange nd;
14531457
nd.reserve(dim_num);
14541458

14551459
if (num % tiles_per_row != 0) {
14561460
throw GlobalOrderWriterException(
1457-
"This fragment target size is not possible please try something else "); // todo fix
1461+
"The target fragment size cannot be achieved. Please try using a "
1462+
"different size, or there might be a misconfiguration in the array "
1463+
"schema.");
14581464
}
14591465

14601466
// Calculate how many rows we will write in the current fragment
1461-
uint64_t rows_to_write = num / tiles_per_row;
1467+
uint64_t rows_of_tiles_to_write = num / tiles_per_row;
14621468

14631469
// Create the range for the index dim (first).
1464-
int start = rows_written_;
1465-
int end = start + rows_to_write - 1;
1470+
int start =
1471+
rows_written_ * cells_in_tile; // todo start from the dim_dom start
1472+
int end = start + (rows_of_tiles_to_write * cells_in_tile) - 1;
14661473
Range range(&start, &end, sizeof(int));
14671474
nd.emplace_back(range);
14681475

@@ -1475,7 +1482,7 @@ NDRange GlobalOrderWriter::ndranges_after_split(uint64_t num) {
14751482
}
14761483

14771484
// add rows written to the cache
1478-
rows_written_ += rows_to_write;
1485+
rows_written_ += rows_of_tiles_to_write;
14791486

14801487
return nd;
14811488
}

tiledb/sm/query/writers/global_order_writer.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,19 @@ class GlobalOrderWriter : public WriterBase {
392392
tdb::pmr::unordered_map<std::string, WriterTileTupleVector>& tiles);
393393

394394
/**
395-
* Return the number of tiles a single row can hold
395+
* Create new ndranges by splitting the first dimension based on the number of
396+
* tiles we need to write
397+
* @param num The number of tiles we need to write.
396398
*
397-
* @return Number of tiles.
398399
*/
399400
NDRange ndranges_after_split(uint64_t num);
400401

402+
/**
403+
* Return the number of tiles a single row can hold. More specifically, the
404+
* number of tiles all dimensions except the first can hold.
405+
*
406+
* @return Number of tiles.
407+
*/
401408
uint64_t num_tiles_per_row();
402409

403410
/**

0 commit comments

Comments
 (0)