Skip to content

Commit b10f270

Browse files
Frank Qinfacebook-github-bot
authored andcommitted
Add size budget
Summary: We encountered several situations where the populator tries to create an unreasonably large data structure, especially for complex and nested thrift types (e.g. `facebook::tupperware::federation::FedUpdateRequest`). This resulted in either timeouts or OOMs. This diff adds a populator option that specifies the target size limit for the populated data structure. The size limit does not account for container overhead and union tags, so the results may be slightly larger than the specified limit. Reviewed By: Mizuchi Differential Revision: D73199010 fbshipit-source-id: 4c2ff359e54a84136fb4dae110ba28fd0492296d
1 parent d22aea9 commit b10f270

File tree

1 file changed

+33
-10
lines changed
  • third-party/thrift/src/thrift/lib/cpp2/reflection

1 file changed

+33
-10
lines changed

third-party/thrift/src/thrift/lib/cpp2/reflection/populator.h

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ struct populator_opts {
5050
// Probability to use for populating optional fields.
5151
float optional_field_prob = 0.0;
5252
size_t recursion_limit = 0;
53+
// A budget for the total size of the generated object.
54+
// The populator will stop generating more list/set/map elements if the budget
55+
// is reached. Note that the budget is not counting container overhead, so the
56+
// generated object may be slightly larger than the budget.
57+
size_t size_budget = 100 * 1024 * 1024;
5358
};
5459

5560
namespace detail {
@@ -151,6 +156,7 @@ struct State {
151156
Rng& rng;
152157
const populator_opts& opts;
153158
std::map<std::type_index, size_t> tag_counts;
159+
size_t size{};
154160
};
155161

156162
// Using the type_info of the tags to check if we are populating recursively
@@ -208,11 +214,13 @@ struct populator_methods<
208214
static void populate(
209215
detail::State<Rng>& state, std::vector<bool>::reference out) {
210216
out = next_value(state.rng);
217+
state.size += sizeof(Int);
211218
}
212219

213220
template <typename Rng>
214221
static void populate(detail::State<Rng>& state, Int& out) {
215222
out = next_value(state.rng);
223+
state.size += sizeof(Int);
216224
}
217225
};
218226

@@ -226,6 +234,7 @@ struct populator_methods<
226234
static void populate(detail::State<Rng>& state, Fp& out) {
227235
std::uniform_real_distribution<Fp> gen;
228236
out = gen(state.rng);
237+
state.size += sizeof(Fp);
229238
DVLOG(4) << "generated real: " << out;
230239
}
231240
};
@@ -247,6 +256,7 @@ struct populator_methods<type::string_t, std::string> {
247256
std::generate_n(str.begin(), length, [&]() {
248257
return static_cast<char>(char_gen(state.rng));
249258
});
259+
state.size += sizeof(char) * length;
250260

251261
DVLOG(4) << "generated string of len" << length;
252262
}
@@ -282,6 +292,7 @@ struct populator_methods<type::binary_t, std::string> {
282292
bin = std::string(length, 0);
283293
auto iter = bin.begin();
284294
generate_bytes(state.rng, bin, length, [&](uint8_t c) { *iter++ = c; });
295+
state.size += length;
285296
}
286297
};
287298

@@ -309,6 +320,7 @@ struct populator_methods<
309320
folly::io::RWUnshareCursor range(&bin);
310321
generate_bytes(
311322
state.rng, range, length, [&](uint8_t c) { range.write<uint8_t>(c); });
323+
state.size += length;
312324
}
313325
};
314326

@@ -355,6 +367,7 @@ struct populator_methods<
355367
int_type tmp;
356368
int_methods::populate(state, tmp);
357369
out = static_cast<Type>(tmp);
370+
state.size += sizeof(Type);
358371
}
359372
};
360373

@@ -377,9 +390,11 @@ struct populator_methods<type::list<ElemTag>, Type> {
377390

378391
DVLOG(3) << "populating list size " << list_size;
379392

380-
out.resize(list_size);
381-
for (decltype(list_size) i = 0; i < list_size; i++) {
382-
elem_methods::populate(state, out[i]);
393+
out.reserve(list_size);
394+
for (decltype(list_size) i = 0;
395+
i < list_size && state.size < state.opts.size_budget;
396+
i++) {
397+
elem_methods::populate(state, out.emplace_back());
383398
}
384399
}
385400
};
@@ -405,7 +420,9 @@ struct populator_methods<type::set<ElemTag>, Type> {
405420
DVLOG(3) << "populating set size " << set_size;
406421
out = Type();
407422

408-
for (decltype(set_size) i = 0; i < set_size; i++) {
423+
for (decltype(set_size) i = 0;
424+
i < set_size && state.size < state.opts.size_budget;
425+
i++) {
409426
elem_type tmp;
410427
elem_methods::populate(state, tmp);
411428
out.insert(std::move(tmp));
@@ -436,7 +453,9 @@ struct populator_methods<type::map<KeyTag, MappedTag>, Type> {
436453
DVLOG(3) << "populating map size " << map_size;
437454
out = Type();
438455

439-
for (decltype(map_size) i = 0; i < map_size; i++) {
456+
for (decltype(map_size) i = 0;
457+
i < map_size && state.size < state.opts.size_budget;
458+
i++) {
440459
key_type key_tmp;
441460
key_methods::populate(state, key_tmp);
442461
mapped_methods::populate(state, out[std::move(key_tmp)]);
@@ -449,7 +468,7 @@ template <typename Union>
449468
struct populator_methods<type::union_t<Union>, Union> {
450469
template <typename Rng>
451470
static void populate(detail::State<Rng>& state, Union& out) {
452-
DVLOG(3) << "begin writing union: "
471+
DVLOG(0) << "begin writing union: "
453472
<< op::get_class_name_v<Union> << ", type: "
454473
<< folly::to_underlying(out.getType());
455474

@@ -465,14 +484,14 @@ struct populator_methods<type::union_t<Union>, Union> {
465484
op::get_type_tag<Union, Ord>,
466485
op::get_native_type<Union, Ord>>;
467486

468-
DVLOG(3) << "writing union field "
487+
DVLOG(0) << "writing union field "
469488
<< op::get_name_v<Union, Ord> << ", fid: "
470489
<< folly::to_underlying(op::get_field_id_v<Union, Ord>);
471490

472491
methods::populate(state, op::get<Ord>(out).ensure());
473492
});
474493

475-
DVLOG(3) << "end writing union";
494+
DVLOG(0) << "end writing union";
476495
}
477496
};
478497

@@ -484,6 +503,7 @@ struct populator_methods<type::struct_t<Struct>, Struct> {
484503
public:
485504
template <typename Ord, typename Rng>
486505
void operator()(Ord, detail::State<Rng>& state, Struct& out) {
506+
DVLOG(0) << "begin writing union: " << op::get_class_name_v<Struct>;
487507
using methods = populator_methods<
488508
op::get_type_tag<Struct, Ord>,
489509
op::get_native_type<Struct, Ord>>;
@@ -494,7 +514,8 @@ struct populator_methods<type::struct_t<Struct>, Struct> {
494514
// Popualate optional fields with `optional_field_prob` probability.
495515
const auto skip = //
496516
::apache::thrift::detail::is_optional_field_ref_v<field_ref_type> &&
497-
!detail::get_bernoulli(state.rng, state.opts.optional_field_prob);
517+
(state.size >= state.opts.size_budget ||
518+
!detail::get_bernoulli(state.rng, state.opts.optional_field_prob));
498519
if (skip) {
499520
return;
500521
}
@@ -504,6 +525,7 @@ struct populator_methods<type::struct_t<Struct>, Struct> {
504525
op::ensure<Ord>(out);
505526
methods::populate(
506527
state, detail::deref<field_ref_type>::clear_and_get(got));
528+
DVLOG(0) << "end writing union";
507529
}
508530
};
509531

@@ -560,7 +582,8 @@ struct populator_methods<
560582

561583
template <typename Type, typename Rng, typename Tag = detail::infer_tag<Type>>
562584
void populate(Type& out, const populator_opts& opts, Rng& rng) {
563-
detail::State<Rng> state{rng, opts, {}};
585+
detail::State<Rng> state{
586+
.rng = rng, .opts = opts, .tag_counts = {}, .size = 0};
564587
return populator_methods<Tag, Type>::populate(state, out);
565588
}
566589

0 commit comments

Comments
 (0)