Skip to content

Commit a7e2a07

Browse files
authored
Fix fragment consolidation to allow using absolute URIs. (#5135)
This fixes fragment consolidation to allow using absolute URIs. I ran into this while adding a TileDB-Go binding for `tiledb_array_consolidate_fragments` in [SC-49723](https://app.shortcut.com/tiledb-inc/story/49723/add-tiledb-go-binding-for-tiledb-array-consolidate-fragments) by passing URIs directly from the [FragmentInfo APIs.](https://github.com/TileDB-Inc/TileDB-Go/pull/322/files#diff-c37e5f4dd452918ab7c467ff243d69310dbd1b238b7b717a98871f80cf0fab70R156) --- TYPE: BUG DESC: Fix fragment consolidation to allow using absolute URIs.
1 parent 64359b4 commit a7e2a07

File tree

4 files changed

+359
-8
lines changed

4 files changed

+359
-8
lines changed

test/src/unit-capi-consolidation.cc

Lines changed: 160 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ struct ConsolidationFx {
167167
static int get_wrt_num(const char* path, void* data);
168168
static int get_ignore_num(const char* path, void* data);
169169
static int get_ok_num(const char* path, void* data);
170+
static int get_vac_num(const char* path, void* data);
170171
static int get_array_meta_files_callback(const char* path, void* data);
171172
static int get_array_meta_vac_files_callback(const char* path, void* data);
172173
static int get_vac_files_callback(const char* path, void* data);
@@ -4863,6 +4864,16 @@ int ConsolidationFx::get_ok_num(const char* path, void* data) {
48634864
return 1;
48644865
}
48654866

4867+
int ConsolidationFx::get_vac_num(const char* path, void* data) {
4868+
auto data_struct = (ConsolidationFx::get_num_struct*)data;
4869+
if (tiledb::sm::utils::parse::ends_with(
4870+
path, tiledb::sm::constants::vacuum_file_suffix)) {
4871+
++data_struct->num;
4872+
}
4873+
4874+
return 1;
4875+
}
4876+
48664877
int ConsolidationFx::get_array_meta_files_callback(
48674878
const char* path, void* data) {
48684879
auto vec = static_cast<std::vector<std::string>*>(data);
@@ -7197,9 +7208,17 @@ TEST_CASE_METHOD(
71977208
REQUIRE(err == nullptr);
71987209

71997210
// Consolidate
7200-
const char* uris[2] = {strrchr(uri, '/') + 1, strrchr(uri2, '/') + 1};
7201-
rc = tiledb_array_consolidate_fragments(
7202-
ctx_, dense_array_uri_.c_str(), uris, 2, cfg);
7211+
SECTION("Relative URIs") {
7212+
const char* uris[2] = {strrchr(uri, '/') + 1, strrchr(uri2, '/') + 1};
7213+
rc = tiledb_array_consolidate_fragments(
7214+
ctx_, dense_array_uri_.c_str(), uris, 2, cfg);
7215+
}
7216+
7217+
SECTION("Absolute URIs") {
7218+
const char* uris[2] = {uri, uri2};
7219+
rc = tiledb_array_consolidate_fragments(
7220+
ctx_, dense_array_uri_.c_str(), uris, 2, cfg);
7221+
}
72037222
CHECK(rc == TILEDB_OK);
72047223
tiledb_config_free(&cfg);
72057224

@@ -7273,9 +7292,28 @@ TEST_CASE_METHOD(
72737292
REQUIRE(err == nullptr);
72747293

72757294
// Consolidate
7276-
const char* uris[2] = {strrchr(uri, '/') + 1, strrchr(uri2, '/') + 1};
7277-
rc = tiledb_array_consolidate_fragments(
7278-
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7295+
SECTION("Relative URIs") {
7296+
const char* uris[2] = {strrchr(uri, '/') + 1, strrchr(uri2, '/') + 1};
7297+
rc = tiledb_array_consolidate_fragments(
7298+
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7299+
}
7300+
7301+
SECTION("Absolute URIs") {
7302+
const char* uris[2] = {uri, uri2};
7303+
rc = tiledb_array_consolidate_fragments(
7304+
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7305+
}
7306+
7307+
SECTION("Invalid URIs") {
7308+
std::string frag1(strrchr(uri, '/') + 1), frag2(strrchr(uri, '/') + 1);
7309+
frag1 = "/some/array/__fragments/" + frag1;
7310+
frag2 = "/some/array/__fragments/" + frag2;
7311+
const char* uris[2] = {frag1.c_str(), frag2.c_str()};
7312+
rc = tiledb_array_consolidate_fragments(
7313+
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7314+
CHECK(rc == TILEDB_ERR);
7315+
return;
7316+
}
72797317
CHECK(rc == TILEDB_OK);
72807318
tiledb_config_free(&cfg);
72817319

@@ -7307,6 +7345,122 @@ TEST_CASE_METHOD(
73077345
remove_sparse_array();
73087346
}
73097347

7348+
#ifndef _WIN32
7349+
TEST_CASE_METHOD(
7350+
ConsolidationFx,
7351+
"C API: Test consolidation v11 array, split fragments",
7352+
"[capi][consolidation][split-fragments][non-rest]") {
7353+
// vfs_copy_dir is only supported on Posix and S3.
7354+
// Experimental builds throw when attempting to write to an array with
7355+
// previous format version.
7356+
if (!vfs_test_setup_.is_local() || is_experimental_build) {
7357+
return;
7358+
}
7359+
7360+
remove_sparse_array();
7361+
create_sparse_array_v11(ctx_, sparse_array_uri_);
7362+
write_sparse_v11(ctx_, sparse_array_uri_, 0);
7363+
write_sparse_v11(ctx_, sparse_array_uri_, 1);
7364+
write_sparse_v11(ctx_, sparse_array_uri_, 2);
7365+
write_sparse_v11(ctx_, sparse_array_uri_, 3);
7366+
7367+
// Create fragment info object
7368+
tiledb_fragment_info_t* fragment_info = nullptr;
7369+
int rc = tiledb_fragment_info_alloc(
7370+
ctx_, sparse_array_uri_.c_str(), &fragment_info);
7371+
CHECK(rc == TILEDB_OK);
7372+
7373+
// Load fragment info
7374+
rc = tiledb_fragment_info_load(ctx_, fragment_info);
7375+
CHECK(rc == TILEDB_OK);
7376+
7377+
// Get fragment URIs
7378+
const char* uri;
7379+
rc = tiledb_fragment_info_get_fragment_uri(ctx_, fragment_info, 1, &uri);
7380+
CHECK(rc == TILEDB_OK);
7381+
const char* uri2;
7382+
rc = tiledb_fragment_info_get_fragment_uri(ctx_, fragment_info, 3, &uri2);
7383+
CHECK(rc == TILEDB_OK);
7384+
7385+
// Set consolidation buffer size
7386+
tiledb_config_t* cfg;
7387+
tiledb_error_t* err = nullptr;
7388+
7389+
rc = tiledb_config_alloc(&cfg, &err);
7390+
REQUIRE(rc == TILEDB_OK);
7391+
REQUIRE(err == nullptr);
7392+
7393+
rc = tiledb_config_set(cfg, "sm.consolidation.buffer_size", "10000", &err);
7394+
REQUIRE(rc == TILEDB_OK);
7395+
REQUIRE(err == nullptr);
7396+
7397+
// Consolidate
7398+
SECTION("Relative URIs") {
7399+
const char* uris[2] = {strrchr(uri, '/') + 1, strrchr(uri2, '/') + 1};
7400+
rc = tiledb_array_consolidate_fragments(
7401+
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7402+
}
7403+
7404+
SECTION("Absolute URIs") {
7405+
const char* uris[2] = {uri, uri2};
7406+
rc = tiledb_array_consolidate_fragments(
7407+
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7408+
}
7409+
7410+
SECTION("Invalid URIs") {
7411+
std::string frag1(strrchr(uri, '/') + 1), frag2(strrchr(uri, '/') + 1);
7412+
frag1 = "/some/array/" + frag1;
7413+
frag2 = "/some/array/" + frag2;
7414+
const char* uris[2] = {frag1.c_str(), frag2.c_str()};
7415+
rc = tiledb_array_consolidate_fragments(
7416+
ctx_, sparse_array_uri_.c_str(), uris, 2, cfg);
7417+
CHECK(rc == TILEDB_ERR);
7418+
return;
7419+
}
7420+
CHECK(rc == TILEDB_OK);
7421+
tiledb_config_free(&cfg);
7422+
7423+
tiledb_fragment_info_free(&fragment_info);
7424+
7425+
// Check for 1 vacuum file after consolidation.
7426+
get_num_struct data = {ctx_, vfs_, 0};
7427+
rc =
7428+
tiledb_vfs_ls(ctx_, vfs_, sparse_array_uri_.c_str(), &get_vac_num, &data);
7429+
CHECK(rc == TILEDB_OK);
7430+
CHECK(data.num == 1);
7431+
7432+
// Check for 5 fragments comitted: 4 writes and 1 consolidation.
7433+
data = {ctx_, vfs_, 0};
7434+
rc = tiledb_vfs_ls(ctx_, vfs_, sparse_array_uri_.c_str(), &get_ok_num, &data);
7435+
CHECK(rc == TILEDB_OK);
7436+
CHECK(data.num == 5);
7437+
7438+
// Check reading after consolidation
7439+
read_sparse_v11(ctx_, sparse_array_uri_, UINT64_MAX);
7440+
7441+
// Vacuum
7442+
rc = tiledb_array_vacuum(ctx_, sparse_array_uri_.c_str(), NULL);
7443+
CHECK(rc == TILEDB_OK);
7444+
read_sparse_v11(ctx_, sparse_array_uri_, UINT64_MAX);
7445+
7446+
// Check for 3 comitted fragments after vacuum.
7447+
data = {ctx_, vfs_, 0};
7448+
rc = tiledb_vfs_ls(ctx_, vfs_, sparse_array_uri_.c_str(), &get_ok_num, &data);
7449+
CHECK(rc == TILEDB_OK);
7450+
CHECK(data.num == 3);
7451+
7452+
// Check for no vacuum file after vacuum.
7453+
data = {ctx_, vfs_, 0};
7454+
rc =
7455+
tiledb_vfs_ls(ctx_, vfs_, sparse_array_uri_.c_str(), &get_vac_num, &data);
7456+
CHECK(rc == TILEDB_OK);
7457+
CHECK(data.num == 0);
7458+
7459+
// Clean up
7460+
remove_sparse_array();
7461+
}
7462+
#endif
7463+
73107464
TEST_CASE_METHOD(
73117465
ConsolidationFx,
73127466
"C API: Test consolidation, empty array",

test/support/src/helpers.cc

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,155 @@ sm::URI generate_fragment_uri(sm::Array* array) {
14721472
return frag_dir_uri.join_path(new_fragment_str);
14731473
}
14741474

1475+
void create_sparse_array_v11(tiledb_ctx_t* ctx, const std::string& array_name) {
1476+
tiledb_config_t* config;
1477+
REQUIRE(tiledb_ctx_get_config(ctx, &config) == TILEDB_OK);
1478+
tiledb_vfs_t* vfs;
1479+
REQUIRE(tiledb_vfs_alloc(ctx, config, &vfs) == TILEDB_OK);
1480+
// Get the v11 sparse array.
1481+
std::string v11_arrays_dir =
1482+
std::string(TILEDB_TEST_INPUTS_DIR) + "/arrays/sparse_array_v11";
1483+
REQUIRE(
1484+
tiledb_vfs_copy_dir(
1485+
ctx, vfs, v11_arrays_dir.c_str(), array_name.c_str()) == TILEDB_OK);
1486+
}
1487+
1488+
void write_sparse_v11(
1489+
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp) {
1490+
// Prepare cell buffers.
1491+
std::vector<int> buffer_a1{0, 1, 2, 3};
1492+
std::vector<uint64_t> buffer_a2{0, 1, 3, 6};
1493+
std::string buffer_var_a2("abbcccdddd");
1494+
std::vector<float> buffer_a3{0.1f, 0.2f, 1.1f, 1.2f, 2.1f, 2.2f, 3.1f, 3.2f};
1495+
std::vector<uint64_t> buffer_coords_dim1{1, 1, 1, 2};
1496+
std::vector<uint64_t> buffer_coords_dim2{1, 2, 4, 3};
1497+
1498+
// Open array.
1499+
tiledb_array_t* array;
1500+
REQUIRE(tiledb_array_alloc(ctx, array_name.c_str(), &array) == TILEDB_OK);
1501+
REQUIRE(
1502+
tiledb_array_set_open_timestamp_end(ctx, array, timestamp) == TILEDB_OK);
1503+
REQUIRE(tiledb_array_open(ctx, array, TILEDB_WRITE) == TILEDB_OK);
1504+
1505+
// Create query.
1506+
tiledb_query_t* query;
1507+
REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_WRITE, &query) == TILEDB_OK);
1508+
REQUIRE(
1509+
tiledb_query_set_layout(ctx, query, TILEDB_GLOBAL_ORDER) == TILEDB_OK);
1510+
uint64_t a1_size = buffer_a1.size() * sizeof(int);
1511+
REQUIRE(
1512+
tiledb_query_set_data_buffer(
1513+
ctx, query, "a1", buffer_a1.data(), &a1_size) == TILEDB_OK);
1514+
uint64_t a2_var_size = buffer_var_a2.size() * sizeof(char);
1515+
REQUIRE(
1516+
tiledb_query_set_data_buffer(
1517+
ctx, query, "a2", (void*)buffer_var_a2.c_str(), &a2_var_size) ==
1518+
TILEDB_OK);
1519+
uint64_t a2_size = buffer_a2.size() * sizeof(uint64_t);
1520+
REQUIRE(
1521+
tiledb_query_set_offsets_buffer(
1522+
ctx, query, "a2", buffer_a2.data(), &a2_size) == TILEDB_OK);
1523+
uint64_t a3_size = buffer_a3.size() * sizeof(float);
1524+
REQUIRE(
1525+
tiledb_query_set_data_buffer(
1526+
ctx, query, "a3", buffer_a3.data(), &a3_size) == TILEDB_OK);
1527+
1528+
uint64_t d1_size = buffer_coords_dim1.size() * sizeof(uint64_t);
1529+
REQUIRE(
1530+
tiledb_query_set_data_buffer(
1531+
ctx, query, "d1", buffer_coords_dim1.data(), &d1_size) == TILEDB_OK);
1532+
uint64_t d2_size = buffer_coords_dim2.size() * sizeof(uint64_t);
1533+
REQUIRE(
1534+
tiledb_query_set_data_buffer(
1535+
ctx, query, "d2", buffer_coords_dim2.data(), &d2_size) == TILEDB_OK);
1536+
1537+
// Submit/finalize the query.
1538+
REQUIRE(tiledb_query_submit_and_finalize(ctx, query) == TILEDB_OK);
1539+
// Close array.
1540+
REQUIRE(tiledb_array_close(ctx, array) == TILEDB_OK);
1541+
1542+
tiledb_array_free(&array);
1543+
tiledb_query_free(&query);
1544+
}
1545+
1546+
void read_sparse_v11(
1547+
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp) {
1548+
// Prepare expected results for cell buffers.
1549+
std::vector<int> buffer_a1{0, 1, 2, 3};
1550+
std::vector<uint64_t> buffer_a2{0, 1, 3, 6};
1551+
std::string buffer_var_a2("abbcccdddd");
1552+
std::vector<float> buffer_a3{0.1f, 0.2f, 1.1f, 1.2f, 2.1f, 2.2f, 3.1f, 3.2f};
1553+
std::vector<uint64_t> buffer_coords_dim1{1, 1, 1, 2};
1554+
std::vector<uint64_t> buffer_coords_dim2{1, 2, 4, 3};
1555+
1556+
int buffer_a1_read[4];
1557+
uint64_t buffer_a2_read[4];
1558+
char buffer_var_a2_read[10];
1559+
float buffer_a3_read[8];
1560+
uint64_t buffer_coords_dim1_read[4];
1561+
uint64_t buffer_coords_dim2_read[4];
1562+
1563+
// Open array.
1564+
tiledb_array_t* array;
1565+
REQUIRE(tiledb_array_alloc(ctx, array_name.c_str(), &array) == TILEDB_OK);
1566+
REQUIRE(
1567+
tiledb_array_set_open_timestamp_end(ctx, array, timestamp) == TILEDB_OK);
1568+
REQUIRE(tiledb_array_open(ctx, array, TILEDB_READ) == TILEDB_OK);
1569+
1570+
// Create query.
1571+
tiledb_query_t* query;
1572+
REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_READ, &query) == TILEDB_OK);
1573+
REQUIRE(
1574+
tiledb_query_set_layout(ctx, query, TILEDB_GLOBAL_ORDER) == TILEDB_OK);
1575+
uint64_t a1_size = buffer_a1.size() * sizeof(int);
1576+
REQUIRE(
1577+
tiledb_query_set_data_buffer(
1578+
ctx, query, "a1", buffer_a1_read, &a1_size) == TILEDB_OK);
1579+
uint64_t a2_var_size = buffer_var_a2.size() * sizeof(char);
1580+
REQUIRE(
1581+
tiledb_query_set_data_buffer(
1582+
ctx, query, "a2", buffer_var_a2_read, &a2_var_size) == TILEDB_OK);
1583+
uint64_t a2_size = buffer_a2.size() * sizeof(uint64_t);
1584+
REQUIRE(
1585+
tiledb_query_set_offsets_buffer(
1586+
ctx, query, "a2", buffer_a2_read, &a2_size) == TILEDB_OK);
1587+
uint64_t a3_size = buffer_a3.size() * sizeof(float);
1588+
REQUIRE(
1589+
tiledb_query_set_data_buffer(
1590+
ctx, query, "a3", buffer_a3_read, &a3_size) == TILEDB_OK);
1591+
1592+
uint64_t d1_size = buffer_coords_dim1.size() * sizeof(uint64_t);
1593+
REQUIRE(
1594+
tiledb_query_set_data_buffer(
1595+
ctx, query, "d1", buffer_coords_dim1_read, &d1_size) == TILEDB_OK);
1596+
uint64_t d2_size = buffer_coords_dim2.size() * sizeof(uint64_t);
1597+
REQUIRE(
1598+
tiledb_query_set_data_buffer(
1599+
ctx, query, "d2", buffer_coords_dim2_read, &d2_size) == TILEDB_OK);
1600+
1601+
// Submit the query.
1602+
REQUIRE(tiledb_query_submit(ctx, query) == TILEDB_OK);
1603+
// Close array.
1604+
REQUIRE(tiledb_array_close(ctx, array) == TILEDB_OK);
1605+
1606+
CHECK(!memcmp(buffer_a1.data(), buffer_a1_read, sizeof(buffer_a1_read)));
1607+
CHECK(!memcmp(
1608+
buffer_var_a2.data(), buffer_var_a2_read, sizeof(buffer_var_a2_read)));
1609+
CHECK(!memcmp(buffer_a2.data(), buffer_a2_read, sizeof(buffer_a2_read)));
1610+
CHECK(!memcmp(buffer_a3.data(), buffer_a3_read, sizeof(buffer_a3_read)));
1611+
CHECK(!memcmp(
1612+
buffer_coords_dim1.data(),
1613+
buffer_coords_dim1_read,
1614+
sizeof(buffer_coords_dim1_read)));
1615+
CHECK(!memcmp(
1616+
buffer_coords_dim2.data(),
1617+
buffer_coords_dim2_read,
1618+
sizeof(buffer_coords_dim2_read)));
1619+
1620+
tiledb_array_free(&array);
1621+
tiledb_query_free(&query);
1622+
}
1623+
14751624
template void check_subarray<int8_t>(
14761625
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
14771626
template void check_subarray<uint8_t>(

test/support/src/helpers.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,34 @@ int deserialize_array_and_query(
929929
* @return A test fragment uri
930930
*/
931931
sm::URI generate_fragment_uri(sm::Array* array);
932+
933+
/**
934+
* Helper function to create a sparse array using format version 11.
935+
*
936+
* @param ctx TileDB context.
937+
* @param array_name The name of the new array to create.
938+
*/
939+
void create_sparse_array_v11(tiledb_ctx_t* ctx, const std::string& array_name);
940+
941+
/**
942+
* Helper function to write data to a format version 11 sparse array.
943+
*
944+
* @param ctx TileDB context.
945+
* @param array_name The name of the array to write to.
946+
* @param timestamp The timestamp to open the array for writing.
947+
*/
948+
void write_sparse_v11(
949+
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp);
950+
951+
/**
952+
* Helper function to validate data read from a format version 11 sparse array.
953+
*
954+
* @param ctx TileDB context.
955+
* @param array_name The name of the array to read from.
956+
* @param timestamp The timestamp to open the array for reading.
957+
*/
958+
void read_sparse_v11(
959+
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp);
932960
} // namespace tiledb::test
933961

934962
#endif

0 commit comments

Comments
 (0)