Skip to content

Commit 738f238

Browse files
authored
Set increased traversal limit in enumeration deserialization (#5365)
So far we'd only apply an increased traversal limit to the deserialization of certain objects like Query, groups and metadata of all sorts. For the rest of them, like Array Schema evolution we use the default Capnp value that is actually [64MB](https://capnproto.org/encoding.html#:~:text=We%20call%20this%20limit%20the,a%20different%20limit%20if%20desired.). We had a real life scenario where evolving the array schema hit the traversal limit and failed. This is most probably due to the addition of a lot of enumerations in a new schema, and it seems that large enumerations are commonly used in certain scientific use cases. To cope with such cases, this PR sets an increased traversal limit to every deserialization that includes enumerations: schema evolution, array schema and load enumerations response. TYPE: BUG DESC: Set increased traversal limit in enumeration deserialization
1 parent 08acabc commit 738f238

File tree

10 files changed

+51
-13
lines changed

10 files changed

+51
-13
lines changed

test/src/unit-enumerations.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2940,7 +2940,7 @@ shared_ptr<ArraySchemaEvolution> EnumerationFx::ser_des_array_schema_evolution(
29402940

29412941
ArraySchemaEvolution* ret;
29422942
throw_if_not_ok(serialization::array_schema_evolution_deserialize(
2943-
&ret, stype, buf, memory_tracker_));
2943+
&ret, cfg_, stype, buf, memory_tracker_));
29442944

29452945
return shared_ptr<ArraySchemaEvolution>(ret);
29462946
}

test/src/unit-request-handlers.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ HandleLoadArraySchemaRequestFx::call_handler(
546546
REQUIRE(rval == TILEDB_OK);
547547

548548
return serialization::deserialize_load_array_schema_response(
549-
uri_, stype, resp_buf->buffer(), memory_tracker_);
549+
uri_, cfg_, stype, resp_buf->buffer(), memory_tracker_);
550550
}
551551

552552
shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {

tiledb/sm/c_api/tiledb.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,7 @@ int32_t tiledb_deserialize_array_schema_evolution(
15721572
ctx,
15731573
tiledb::sm::serialization::array_schema_evolution_deserialize(
15741574
&((*array_schema_evolution)->array_schema_evolution_),
1575+
ctx->config(),
15751576
(tiledb::sm::SerializationType)serialize_type,
15761577
buffer->buffer(),
15771578
memory_tracker))) {

tiledb/sm/rest/rest_client_remote.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ RestClientRemote::post_array_schema_from_rest(
274274
// Ensure data has a null delimiter for cap'n proto if using JSON
275275
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
276276
return serialization::deserialize_load_array_schema_response(
277-
uri, serialization_type_, returned_data, memory_tracker_);
277+
uri, config, serialization_type_, returned_data, memory_tracker_);
278278
}
279279

280280
Status RestClientRemote::post_array_schema_to_rest(
@@ -616,7 +616,7 @@ RestClientRemote::post_enumerations_from_rest(
616616
// Ensure data has a null delimiter for cap'n proto if using JSON
617617
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
618618
return serialization::deserialize_load_enumerations_response(
619-
array_schema, serialization_type_, returned_data, memory_tracker);
619+
array_schema, config, serialization_type_, returned_data, memory_tracker);
620620
}
621621

622622
void RestClientRemote::post_query_plan_from_rest(

tiledb/sm/serialization/array_schema.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,6 +1824,7 @@ std::tuple<
18241824
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
18251825
deserialize_load_array_schema_response(
18261826
const URI& uri,
1827+
const Config& config,
18271828
SerializationType serialization_type,
18281829
span<const char> data,
18291830
shared_ptr<MemoryTracker> memory_tracker) {
@@ -1840,10 +1841,19 @@ deserialize_load_array_schema_response(
18401841
uri, reader, memory_tracker);
18411842
}
18421843
case SerializationType::CAPNP: {
1844+
// Set traversal limit from config
1845+
uint64_t limit =
1846+
config.get<uint64_t>("rest.capnp_traversal_limit").value();
1847+
::capnp::ReaderOptions readerOptions;
1848+
// capnp uses the limit in words
1849+
readerOptions.traversalLimitInWords = limit / sizeof(::capnp::word);
1850+
18431851
const auto mBytes = reinterpret_cast<const kj::byte*>(data.data());
1844-
::capnp::FlatArrayMessageReader array_reader(kj::arrayPtr(
1845-
reinterpret_cast<const ::capnp::word*>(mBytes),
1846-
data.size() / sizeof(::capnp::word)));
1852+
::capnp::FlatArrayMessageReader array_reader(
1853+
kj::arrayPtr(
1854+
reinterpret_cast<const ::capnp::word*>(mBytes),
1855+
data.size() / sizeof(::capnp::word)),
1856+
readerOptions);
18471857
auto reader = array_reader.getRoot<capnp::LoadArraySchemaResponse>();
18481858
return load_array_schema_response_from_capnp(
18491859
uri, reader, memory_tracker);
@@ -1926,6 +1936,7 @@ std::tuple<
19261936
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
19271937
deserialize_load_array_schema_response(
19281938
const URI&,
1939+
const Config&,
19291940
SerializationType,
19301941
span<const char>,
19311942
shared_ptr<MemoryTracker>) {

tiledb/sm/serialization/array_schema.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ std::tuple<
208208
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
209209
deserialize_load_array_schema_response(
210210
const URI& uri,
211+
const Config& config,
211212
SerializationType serialization_type,
212213
span<const char> data,
213214
shared_ptr<MemoryTracker> memory_tracker);

tiledb/sm/serialization/array_schema_evolution.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ Status array_schema_evolution_serialize(
289289

290290
Status array_schema_evolution_deserialize(
291291
ArraySchemaEvolution** array_schema_evolution,
292+
const Config& config,
292293
SerializationType serialize_type,
293294
span<const char> serialized_buffer,
294295
shared_ptr<MemoryTracker> memory_tracker) {
@@ -312,11 +313,20 @@ Status array_schema_evolution_deserialize(
312313
break;
313314
}
314315
case SerializationType::CAPNP: {
316+
// Set traversal limit from config
317+
uint64_t limit =
318+
config.get<uint64_t>("rest.capnp_traversal_limit").value();
319+
::capnp::ReaderOptions readerOptions;
320+
// capnp uses the limit in words
321+
readerOptions.traversalLimitInWords = limit / sizeof(::capnp::word);
322+
315323
const auto mBytes =
316324
reinterpret_cast<const kj::byte*>(serialized_buffer.data());
317-
::capnp::FlatArrayMessageReader reader(kj::arrayPtr(
318-
reinterpret_cast<const ::capnp::word*>(mBytes),
319-
serialized_buffer.size() / sizeof(::capnp::word)));
325+
::capnp::FlatArrayMessageReader reader(
326+
kj::arrayPtr(
327+
reinterpret_cast<const ::capnp::word*>(mBytes),
328+
serialized_buffer.size() / sizeof(::capnp::word)),
329+
readerOptions);
320330
capnp::ArraySchemaEvolution::Reader array_schema_evolution_reader =
321331
reader.getRoot<capnp::ArraySchemaEvolution>();
322332
decoded_array_schema_evolution = array_schema_evolution_from_capnp(
@@ -363,6 +373,7 @@ Status array_schema_evolution_serialize(
363373

364374
Status array_schema_evolution_deserialize(
365375
ArraySchemaEvolution**,
376+
const Config&,
366377
SerializationType,
367378
span<const char>,
368379
shared_ptr<MemoryTracker>) {

tiledb/sm/serialization/array_schema_evolution.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,15 @@ Status array_schema_evolution_serialize(
7070
/**
7171
* Deserialize an array schema evolution via Cap'n Proto
7272
* @param array_schema_evolution pointer to store evolution object in
73+
* @param config associated config object
7374
* @param serialize_type format to serialize into Cap'n Proto or JSON
7475
* @param serialized_buffer buffer where serialized bytes are stored
7576
* @param memory_tracker memory tracker associated with the evolution object
7677
* @return
7778
*/
7879
Status array_schema_evolution_deserialize(
7980
ArraySchemaEvolution** array_schema_evolution,
81+
const Config& config,
8082
SerializationType serialize_type,
8183
span<const char> serialized_buffer,
8284
shared_ptr<MemoryTracker> memory_tracker);

tiledb/sm/serialization/enumeration.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ void serialize_load_enumerations_response(
346346
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>>
347347
deserialize_load_enumerations_response(
348348
const ArraySchema& array_schema,
349+
const Config& config,
349350
SerializationType serialize_type,
350351
span<const char> response,
351352
shared_ptr<MemoryTracker> memory_tracker) {
@@ -362,10 +363,19 @@ deserialize_load_enumerations_response(
362363
reader, array_schema, memory_tracker);
363364
}
364365
case SerializationType::CAPNP: {
366+
// Set traversal limit from config
367+
uint64_t limit =
368+
config.get<uint64_t>("rest.capnp_traversal_limit").value();
369+
::capnp::ReaderOptions readerOptions;
370+
// capnp uses the limit in words
371+
readerOptions.traversalLimitInWords = limit / sizeof(::capnp::word);
372+
365373
const auto mBytes = reinterpret_cast<const kj::byte*>(response.data());
366-
::capnp::FlatArrayMessageReader array_reader(kj::arrayPtr(
367-
reinterpret_cast<const ::capnp::word*>(mBytes),
368-
response.size() / sizeof(::capnp::word)));
374+
::capnp::FlatArrayMessageReader array_reader(
375+
kj::arrayPtr(
376+
reinterpret_cast<const ::capnp::word*>(mBytes),
377+
response.size() / sizeof(::capnp::word)),
378+
readerOptions);
369379
capnp::LoadEnumerationsResponse::Reader reader =
370380
array_reader.getRoot<capnp::LoadEnumerationsResponse>();
371381
return load_enumerations_response_from_capnp(
@@ -414,6 +424,7 @@ void serialize_load_enumerations_response(
414424
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>>
415425
deserialize_load_enumerations_response(
416426
const Array&,
427+
const Config&,
417428
SerializationType,
418429
span<const char>,
419430
shared_ptr<MemoryTracker>) {

tiledb/sm/serialization/enumeration.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ void serialize_load_enumerations_response(
9393
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>>
9494
deserialize_load_enumerations_response(
9595
const ArraySchema& array_schema,
96+
const Config& config,
9697
SerializationType serialization_type,
9798
span<const char> response,
9899
shared_ptr<MemoryTracker> memory_tracker);

0 commit comments

Comments
 (0)