Skip to content

Commit 7902e8b

Browse files
authored
Don't reallocate buffers in fragment consolidation (#4614)
Avoid needless buffer reallocations when consolidating fragments. --- TYPE: IMPROVEMENT DESC: Avoid needless buffer reallocations when consolidating fragments.
1 parent 1579cfc commit 7902e8b

File tree

5 files changed

+350
-177
lines changed

5 files changed

+350
-177
lines changed

test/regression/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ if (TILEDB_CPP_API)
4848
list(APPEND SOURCES targets/sc-29682.cc)
4949
list(APPEND SOURCES targets/sc-33480.cc)
5050
list(APPEND SOURCES targets/sc-35424.cc)
51+
list(APPEND SOURCES targets/sc-36372.cc)
5152
list(APPEND SOURCES targets/sc-38300.cc)
5253
endif()
5354

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#include <limits>
2+
#include <string>
3+
4+
#include <tiledb/tiledb>
5+
6+
#include <test/support/tdb_catch.h>
7+
8+
TEST_CASE(
9+
"C++ API: Consolidation slowness in create_buffer with large number of "
10+
"attributes"
11+
"[cppapi][consolidation][sc36372]") {
12+
std::string array_name = "cpp_unit_array_36372";
13+
14+
tiledb::Config cfg;
15+
cfg["sm.consolidation.step_min_frags"] = 2;
16+
cfg["sm.consolidation.step_max_frags"] = 4;
17+
tiledb::Context ctx(cfg);
18+
tiledb::VFS vfs(ctx);
19+
20+
if (vfs.is_dir(array_name)) {
21+
vfs.remove_dir(array_name);
22+
}
23+
24+
tiledb::Domain domain(ctx);
25+
auto domain_lo = std::numeric_limits<unsigned int>::min();
26+
auto domain_hi = std::numeric_limits<unsigned int>::max() - 1;
27+
28+
// Create and initialize dimension.
29+
auto d0 = tiledb::Dimension::create<unsigned int>(
30+
ctx, "d0", {{domain_lo, domain_hi}}, 2);
31+
auto d1 = tiledb::Dimension::create<unsigned int>(
32+
ctx, "d1", {{domain_lo, domain_hi}}, 4);
33+
auto d2 = tiledb::Dimension::create<unsigned int>(
34+
ctx, "d2", {{domain_lo, domain_hi}}, 50);
35+
auto d3 = tiledb::Dimension::create<unsigned int>(
36+
ctx, "d3", {{domain_lo, domain_hi}}, 200);
37+
auto d4 = tiledb::Dimension::create<unsigned int>(
38+
ctx, "d4", {{domain_lo, domain_hi}}, 2);
39+
auto d5 = tiledb::Dimension::create<unsigned int>(
40+
ctx, "d5", {{domain_lo, domain_hi}}, 2);
41+
42+
domain.add_dimensions(d0, d1, d2, d3, d4, d5);
43+
44+
auto a0 = tiledb::Attribute::create<double>(ctx, "a0").set_cell_val_num(
45+
TILEDB_VAR_NUM);
46+
auto a1 = tiledb::Attribute::create<double>(ctx, "a1").set_cell_val_num(
47+
TILEDB_VAR_NUM);
48+
auto a2 = tiledb::Attribute::create<double>(ctx, "a2").set_cell_val_num(
49+
TILEDB_VAR_NUM);
50+
auto a3 = tiledb::Attribute::create<double>(ctx, "a3").set_cell_val_num(
51+
TILEDB_VAR_NUM);
52+
auto a4 = tiledb::Attribute::create<double>(ctx, "a4").set_cell_val_num(
53+
TILEDB_VAR_NUM);
54+
auto a5 = tiledb::Attribute::create<double>(ctx, "a5").set_cell_val_num(
55+
TILEDB_VAR_NUM);
56+
auto a6 = tiledb::Attribute::create<double>(ctx, "a6").set_cell_val_num(
57+
TILEDB_VAR_NUM);
58+
auto a7 = tiledb::Attribute::create<double>(ctx, "a7").set_cell_val_num(
59+
TILEDB_VAR_NUM);
60+
auto a8 = tiledb::Attribute::create<double>(ctx, "a8").set_cell_val_num(
61+
TILEDB_VAR_NUM);
62+
auto a9 = tiledb::Attribute::create<double>(ctx, "a9").set_cell_val_num(
63+
TILEDB_VAR_NUM);
64+
auto a10 = tiledb::Attribute::create<unsigned int>(ctx, "a10")
65+
.set_cell_val_num(TILEDB_VAR_NUM);
66+
auto a11 = tiledb::Attribute::create<unsigned int>(ctx, "a11")
67+
.set_cell_val_num(TILEDB_VAR_NUM);
68+
auto a12 = tiledb::Attribute::create<unsigned int>(ctx, "a12")
69+
.set_cell_val_num(TILEDB_VAR_NUM);
70+
auto a13 = tiledb::Attribute::create<float>(ctx, "a13")
71+
.set_cell_val_num(TILEDB_VAR_NUM);
72+
auto a14 = tiledb::Attribute::create<unsigned int>(ctx, "a14")
73+
.set_cell_val_num(TILEDB_VAR_NUM);
74+
75+
tiledb::ArraySchema schema(ctx, TILEDB_SPARSE);
76+
schema.set_domain(domain);
77+
schema.add_attributes(
78+
a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14);
79+
schema.set_capacity(10000000);
80+
schema.set_cell_order(TILEDB_ROW_MAJOR);
81+
schema.set_tile_order(TILEDB_ROW_MAJOR);
82+
CHECK_NOTHROW(tiledb::Array::create(array_name, schema));
83+
84+
// Perform Write
85+
std::vector<unsigned int> d0_data(1, 0);
86+
std::vector<unsigned int> d1_data(1, 0);
87+
std::vector<unsigned int> d2_data(1, 0);
88+
std::vector<unsigned int> d3_data(1, 0);
89+
std::vector<unsigned int> d4_data(1, 0);
90+
std::vector<unsigned int> d5_data(1, 0);
91+
std::vector<double> a0_data(1, 0);
92+
std::vector<uint64_t> a0_offsets(1, 0);
93+
std::vector<double> a1_data(1, 0);
94+
std::vector<uint64_t> a1_offsets(1, 0);
95+
std::vector<double> a2_data(1, 0);
96+
std::vector<uint64_t> a2_offsets(1, 0);
97+
std::vector<double> a3_data(1, 0);
98+
std::vector<uint64_t> a3_offsets(1, 0);
99+
std::vector<double> a4_data(1, 0);
100+
std::vector<uint64_t> a4_offsets(1, 0);
101+
std::vector<double> a5_data(1, 0);
102+
std::vector<uint64_t> a5_offsets(1, 0);
103+
std::vector<double> a6_data(1, 0);
104+
std::vector<uint64_t> a6_offsets(1, 0);
105+
std::vector<double> a7_data(1, 0);
106+
std::vector<uint64_t> a7_offsets(1, 0);
107+
std::vector<double> a8_data(1, 0);
108+
std::vector<uint64_t> a8_offsets(1, 0);
109+
std::vector<double> a9_data(1, 0);
110+
std::vector<uint64_t> a9_offsets(1, 0);
111+
std::vector<unsigned int> a10_data(1, 0);
112+
std::vector<uint64_t> a10_offsets(1, 0);
113+
std::vector<unsigned int> a11_data(1, 0);
114+
std::vector<uint64_t> a11_offsets(1, 0);
115+
std::vector<unsigned int> a12_data(1, 0);
116+
std::vector<uint64_t> a12_offsets(1, 0);
117+
std::vector<float> a13_data(1, 0);
118+
std::vector<uint64_t> a13_offsets(1, 0);
119+
std::vector<unsigned int> a14_data(1, 0);
120+
std::vector<uint64_t> a14_offsets(1, 0);
121+
122+
uint8_t fragments_to_create = 196;
123+
tiledb::Array array(ctx, array_name, TILEDB_WRITE);
124+
for (uint8_t i = 0; i < fragments_to_create; i++) {
125+
d0_data[0] = i;
126+
d1_data[0] = i;
127+
d2_data[0] = i;
128+
d3_data[0] = i;
129+
d4_data[0] = i;
130+
d5_data[0] = i;
131+
a0_data[0] = i;
132+
a1_data[0] = i;
133+
a2_data[0] = i;
134+
a3_data[0] = i;
135+
a4_data[0] = i;
136+
a5_data[0] = i;
137+
a6_data[0] = i;
138+
a7_data[0] = i;
139+
a8_data[0] = i;
140+
a9_data[0] = i;
141+
a10_data[0] = i;
142+
a11_data[0] = i;
143+
a12_data[0] = i;
144+
a13_data[0] = i;
145+
a14_data[0] = i;
146+
147+
tiledb::Query query(ctx, array);
148+
query.set_data_buffer("d0", d0_data);
149+
query.set_data_buffer("d1", d1_data);
150+
query.set_data_buffer("d2", d2_data);
151+
query.set_data_buffer("d3", d3_data);
152+
query.set_data_buffer("d4", d4_data);
153+
query.set_data_buffer("d5", d5_data);
154+
155+
query.set_data_buffer("a0", a0_data).set_offsets_buffer("a0", a0_offsets);
156+
query.set_data_buffer("a1", a1_data).set_offsets_buffer("a1", a1_offsets);
157+
query.set_data_buffer("a2", a2_data).set_offsets_buffer("a2", a2_offsets);
158+
query.set_data_buffer("a3", a3_data).set_offsets_buffer("a3", a3_offsets);
159+
query.set_data_buffer("a4", a4_data).set_offsets_buffer("a4", a4_offsets);
160+
query.set_data_buffer("a5", a5_data).set_offsets_buffer("a5", a5_offsets);
161+
query.set_data_buffer("a6", a6_data).set_offsets_buffer("a6", a6_offsets);
162+
query.set_data_buffer("a7", a7_data).set_offsets_buffer("a7", a7_offsets);
163+
query.set_data_buffer("a8", a8_data).set_offsets_buffer("a8", a8_offsets);
164+
query.set_data_buffer("a9", a9_data).set_offsets_buffer("a9", a9_offsets);
165+
query.set_data_buffer("a10", a10_data)
166+
.set_offsets_buffer("a10", a10_offsets);
167+
query.set_data_buffer("a11", a11_data)
168+
.set_offsets_buffer("a11", a11_offsets);
169+
query.set_data_buffer("a12", a12_data)
170+
.set_offsets_buffer("a12", a12_offsets);
171+
query.set_data_buffer("a13", a13_data)
172+
.set_offsets_buffer("a13", a13_offsets);
173+
query.set_data_buffer("a14", a14_data)
174+
.set_offsets_buffer("a14", a14_offsets);
175+
176+
query.submit();
177+
}
178+
179+
// Consolidate
180+
tiledb::Stats::enable();
181+
tiledb::Array::consolidate(ctx, array_name);
182+
tiledb::Stats::dump();
183+
184+
// Vacuum
185+
tiledb::Array::vacuum(ctx, array_name);
186+
187+
// Cleanup.
188+
if (vfs.is_dir(array_name)) {
189+
vfs.remove_dir(array_name);
190+
}
191+
}

0 commit comments

Comments
 (0)