Skip to content

Commit b38cea9

Browse files
committed
Add util for estimating capacity needed for flatbuffer-encoding functioncall
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent c35c080 commit b38cea9

File tree

1 file changed

+354
-0
lines changed
  • src/hyperlight_common/src/flatbuffer_wrappers

1 file changed

+354
-0
lines changed

src/hyperlight_common/src/flatbuffer_wrappers/util.rs

Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use alloc::vec::Vec;
1818

1919
use flatbuffers::FlatBufferBuilder;
2020

21+
use crate::flatbuffer_wrappers::function_types::ParameterValue;
2122
use crate::flatbuffers::hyperlight::generated::{
2223
FunctionCallResult as FbFunctionCallResult, FunctionCallResultArgs as FbFunctionCallResultArgs,
2324
ReturnValue as FbReturnValue, hlbool as Fbhlbool, hlboolArgs as FbhlboolArgs,
@@ -169,3 +170,356 @@ impl FlatbufferSerializable for bool {
169170
}
170171
}
171172
}
173+
174+
/// Estimates the required buffer capacity for encoding a FunctionCall with the given parameters.
175+
/// This helps avoid reallocation during FlatBuffer encoding when passing large slices and strings.
176+
///
177+
/// The function aims to be lightweight and fast and run in O(1) as long as the number of parameters is limited
178+
/// (which it is since hyperlight only currently supports up to 12).
179+
///
180+
/// Note: This estimates the capacity needed for the inner vec inside a FlatBufferBuilder. It does not
181+
/// necessarily match the size of the final encoded buffer. The estimation always rounds up to the
182+
/// nearest power of two to match FlatBufferBuilder's allocation strategy.
183+
///
184+
/// The estimations are numbers used are empirically derived based on the tests below and vaguely based
185+
/// on https://flatbuffers.dev/internals/ and https://github.com/dvidelabs/flatcc/blob/master/doc/binary-format.md#flatbuffers-binary-format
186+
#[inline] // allow cross-crate inlining (for hyperlight-host calls)
187+
pub fn estimate_flatbuffer_capacity(function_name: &str, args: &[ParameterValue]) -> usize {
188+
let mut estimated_capacity = 20;
189+
190+
// Function name overhead
191+
estimated_capacity += function_name.len() + 12;
192+
193+
// Parameters vector overhead
194+
estimated_capacity += 12 + args.len() * 6;
195+
196+
// Per-parameter overhead
197+
for arg in args {
198+
estimated_capacity += 16; // Base parameter structure
199+
estimated_capacity += match arg {
200+
ParameterValue::String(s) => s.len() + 20,
201+
ParameterValue::VecBytes(v) => v.len() + 20,
202+
ParameterValue::Int(_) | ParameterValue::UInt(_) => 16,
203+
ParameterValue::Long(_) | ParameterValue::ULong(_) => 20,
204+
ParameterValue::Float(_) => 16,
205+
ParameterValue::Double(_) => 20,
206+
ParameterValue::Bool(_) => 12,
207+
};
208+
}
209+
210+
// match how vec grows
211+
estimated_capacity.next_power_of_two()
212+
}
213+
214+
#[cfg(test)]
215+
mod tests {
216+
use super::*;
217+
use crate::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType};
218+
use crate::flatbuffer_wrappers::function_types::{ParameterValue, ReturnType};
219+
use alloc::string::ToString;
220+
use alloc::vec;
221+
use alloc::vec::Vec;
222+
223+
/// Helper function to check that estimation is within reasonable bounds (±25%)
224+
fn assert_estimation_accuracy(
225+
function_name: &str,
226+
args: Vec<ParameterValue>,
227+
call_type: FunctionCallType,
228+
return_type: ReturnType,
229+
) {
230+
let estimated = estimate_flatbuffer_capacity(function_name, &args);
231+
232+
let fc = FunctionCall::new(
233+
function_name.to_string(),
234+
Some(args),
235+
call_type.clone(),
236+
return_type,
237+
);
238+
// Important that this FlatBufferBuilder is created with capacity 0 so it grows to its needed capacity
239+
let mut builder = FlatBufferBuilder::new();
240+
let _buffer = fc.encode(&mut builder);
241+
let actual = builder.collapse().0.capacity();
242+
243+
let lower_bound = (actual as f64 * 0.75) as usize;
244+
let upper_bound = (actual as f64 * 1.25) as usize;
245+
246+
assert!(
247+
estimated >= lower_bound && estimated <= upper_bound,
248+
"Estimation {} outside bounds [{}, {}] for actual size {} (function: {}, call_type: {:?}, return_type: {:?})",
249+
estimated,
250+
lower_bound,
251+
upper_bound,
252+
actual,
253+
function_name,
254+
call_type,
255+
return_type
256+
);
257+
}
258+
259+
#[test]
260+
fn test_estimate_no_parameters() {
261+
assert_estimation_accuracy(
262+
"simple_function",
263+
vec![],
264+
FunctionCallType::Guest,
265+
ReturnType::Void,
266+
);
267+
}
268+
269+
#[test]
270+
fn test_estimate_single_int_parameter() {
271+
assert_estimation_accuracy(
272+
"add_one",
273+
vec![ParameterValue::Int(42)],
274+
FunctionCallType::Guest,
275+
ReturnType::Int,
276+
);
277+
}
278+
279+
#[test]
280+
fn test_estimate_multiple_scalar_parameters() {
281+
assert_estimation_accuracy(
282+
"calculate",
283+
vec![
284+
ParameterValue::Int(10),
285+
ParameterValue::UInt(20),
286+
ParameterValue::Long(30),
287+
ParameterValue::ULong(40),
288+
ParameterValue::Float(1.5),
289+
ParameterValue::Double(2.5),
290+
ParameterValue::Bool(true),
291+
],
292+
FunctionCallType::Guest,
293+
ReturnType::Double,
294+
);
295+
}
296+
297+
#[test]
298+
fn test_estimate_string_parameters() {
299+
assert_estimation_accuracy(
300+
"process_strings",
301+
vec![
302+
ParameterValue::String("hello".to_string()),
303+
ParameterValue::String("world".to_string()),
304+
ParameterValue::String("this is a longer string for testing".to_string()),
305+
],
306+
FunctionCallType::Host,
307+
ReturnType::String,
308+
);
309+
}
310+
311+
#[test]
312+
fn test_estimate_very_long_string() {
313+
let long_string = "a".repeat(1000);
314+
assert_estimation_accuracy(
315+
"process_long_string",
316+
vec![ParameterValue::String(long_string)],
317+
FunctionCallType::Guest,
318+
ReturnType::String,
319+
);
320+
}
321+
322+
#[test]
323+
fn test_estimate_vector_parameters() {
324+
assert_estimation_accuracy(
325+
"process_vectors",
326+
vec![
327+
ParameterValue::VecBytes(vec![1, 2, 3, 4, 5]),
328+
ParameterValue::VecBytes(vec![]),
329+
ParameterValue::VecBytes(vec![0; 100]),
330+
],
331+
FunctionCallType::Host,
332+
ReturnType::VecBytes,
333+
);
334+
}
335+
336+
#[test]
337+
fn test_estimate_mixed_parameters() {
338+
assert_estimation_accuracy(
339+
"complex_function",
340+
vec![
341+
ParameterValue::String("test".to_string()),
342+
ParameterValue::Int(42),
343+
ParameterValue::VecBytes(vec![1, 2, 3, 4, 5]),
344+
ParameterValue::Bool(true),
345+
ParameterValue::Double(553.14159),
346+
ParameterValue::String("another string".to_string()),
347+
ParameterValue::Long(9223372036854775807),
348+
],
349+
FunctionCallType::Guest,
350+
ReturnType::VecBytes,
351+
);
352+
}
353+
354+
#[test]
355+
fn test_estimate_large_function_name() {
356+
let long_name = "very_long_function_name_that_exceeds_normal_lengths_for_testing_purposes";
357+
assert_estimation_accuracy(
358+
long_name,
359+
vec![ParameterValue::Int(1)],
360+
FunctionCallType::Host,
361+
ReturnType::Long,
362+
);
363+
}
364+
365+
#[test]
366+
fn test_estimate_large_vector() {
367+
let large_vector = vec![42u8; 10000];
368+
assert_estimation_accuracy(
369+
"process_large_data",
370+
vec![ParameterValue::VecBytes(large_vector)],
371+
FunctionCallType::Guest,
372+
ReturnType::Bool,
373+
);
374+
}
375+
376+
#[test]
377+
fn test_estimate_all_parameter_types() {
378+
assert_estimation_accuracy(
379+
"comprehensive_test",
380+
vec![
381+
ParameterValue::Int(i32::MIN),
382+
ParameterValue::UInt(u32::MAX),
383+
ParameterValue::Long(i64::MIN),
384+
ParameterValue::ULong(u64::MAX),
385+
ParameterValue::Float(f32::MIN),
386+
ParameterValue::Double(f64::MAX),
387+
ParameterValue::Bool(false),
388+
ParameterValue::String("test string".to_string()),
389+
ParameterValue::VecBytes(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
390+
],
391+
FunctionCallType::Host,
392+
ReturnType::ULong,
393+
);
394+
}
395+
396+
#[test]
397+
fn test_different_function_call_types() {
398+
assert_estimation_accuracy(
399+
"guest_function",
400+
vec![ParameterValue::String("guest call".to_string())],
401+
FunctionCallType::Guest,
402+
ReturnType::String,
403+
);
404+
405+
assert_estimation_accuracy(
406+
"host_function",
407+
vec![ParameterValue::String("host call".to_string())],
408+
FunctionCallType::Host,
409+
ReturnType::String,
410+
);
411+
}
412+
413+
#[test]
414+
fn test_different_return_types() {
415+
// Test estimation with different return types to ensure they don't affect parameter estimation
416+
let args = vec![
417+
ParameterValue::Int(42),
418+
ParameterValue::String("test".to_string()),
419+
];
420+
421+
// All should have similar estimations since return type doesn't affect parameter encoding
422+
let void_est = estimate_flatbuffer_capacity("test_void", &args);
423+
let int_est = estimate_flatbuffer_capacity("test_int", &args);
424+
let string_est = estimate_flatbuffer_capacity("test_string", &args);
425+
426+
// Estimations should be very close (within a few bytes) since they have same parameters
427+
assert!((void_est as i32 - int_est as i32).abs() < 10);
428+
assert!((int_est as i32 - string_est as i32).abs() < 10);
429+
430+
// But let's also test that the actual function calls work with different return types
431+
assert_estimation_accuracy(
432+
"test_void",
433+
args.clone(),
434+
FunctionCallType::Guest,
435+
ReturnType::Void,
436+
);
437+
assert_estimation_accuracy(
438+
"test_int",
439+
args.clone(),
440+
FunctionCallType::Guest,
441+
ReturnType::Int,
442+
);
443+
assert_estimation_accuracy(
444+
"test_string",
445+
args,
446+
FunctionCallType::Guest,
447+
ReturnType::String,
448+
);
449+
}
450+
451+
#[test]
452+
fn test_estimate_many_large_vectors_and_strings() {
453+
// Test with multiple large vectors and strings to stress-test the estimation
454+
assert_estimation_accuracy(
455+
"process_bulk_data",
456+
vec![
457+
ParameterValue::String("Large string data: ".to_string() + &"x".repeat(2000)),
458+
ParameterValue::VecBytes(vec![1u8; 5000]),
459+
ParameterValue::String(
460+
"Another large string with lots of content ".to_string() + &"y".repeat(3000),
461+
),
462+
ParameterValue::VecBytes(vec![255u8; 7500]),
463+
ParameterValue::String(
464+
"Third massive string parameter ".to_string() + &"z".repeat(1500),
465+
),
466+
ParameterValue::VecBytes(vec![128u8; 10000]),
467+
ParameterValue::Int(42),
468+
ParameterValue::String("Final large string ".to_string() + &"a".repeat(4000)),
469+
ParameterValue::VecBytes(vec![64u8; 2500]),
470+
ParameterValue::Bool(true),
471+
],
472+
FunctionCallType::Host,
473+
ReturnType::VecBytes,
474+
);
475+
}
476+
477+
#[test]
478+
fn test_estimate_twenty_parameters() {
479+
// Test with 20 parameters to stress-test parameter count handling
480+
assert_estimation_accuracy(
481+
"function_with_many_parameters",
482+
vec![
483+
ParameterValue::Int(1),
484+
ParameterValue::String("param2".to_string()),
485+
ParameterValue::Bool(true),
486+
ParameterValue::Float(3.14),
487+
ParameterValue::VecBytes(vec![1, 2, 3]),
488+
ParameterValue::Long(1000000),
489+
ParameterValue::Double(2.718),
490+
ParameterValue::UInt(42),
491+
ParameterValue::String("param9".to_string()),
492+
ParameterValue::Bool(false),
493+
ParameterValue::ULong(9999999999),
494+
ParameterValue::VecBytes(vec![4, 5, 6, 7, 8]),
495+
ParameterValue::Int(-100),
496+
ParameterValue::Float(1.414),
497+
ParameterValue::String("param15".to_string()),
498+
ParameterValue::Double(1.732),
499+
ParameterValue::Bool(true),
500+
ParameterValue::VecBytes(vec![9, 10]),
501+
ParameterValue::Long(-5000000),
502+
ParameterValue::UInt(12345),
503+
],
504+
FunctionCallType::Guest,
505+
ReturnType::Int,
506+
);
507+
}
508+
509+
#[test]
510+
fn test_estimate_megabyte_parameters() {
511+
// Test with multiple megabyte-sized parameters to stress-test very large data
512+
assert_estimation_accuracy(
513+
"process_megabyte_data",
514+
vec![
515+
ParameterValue::String("MB String 1: ".to_string() + &"x".repeat(1_048_576)), // 1MB string
516+
ParameterValue::VecBytes(vec![42u8; 2_097_152]), // 2MB vector
517+
ParameterValue::String("MB String 2: ".to_string() + &"y".repeat(1_572_864)), // 1.5MB string
518+
ParameterValue::VecBytes(vec![128u8; 3_145_728]), // 3MB vector
519+
ParameterValue::String("MB String 3: ".to_string() + &"z".repeat(2_097_152)), // 2MB string
520+
],
521+
FunctionCallType::Host,
522+
ReturnType::VecBytes,
523+
);
524+
}
525+
}

0 commit comments

Comments
 (0)