Skip to content

Commit 8eb10c9

Browse files
authored
feat(estimated-size): add method to compute estimated size for value (#708)
1 parent 3e19f7a commit 8eb10c9

File tree

1 file changed

+325
-0
lines changed

1 file changed

+325
-0
lines changed

src/base/value.rs

Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,24 @@ impl KeyValue {
360360
_ => 1,
361361
}
362362
}
363+
364+
pub fn estimated_detached_byte_size(&self) -> usize {
365+
match self {
366+
KeyValue::Bytes(v) => v.len(),
367+
KeyValue::Str(v) => v.len(),
368+
KeyValue::Struct(v) => {
369+
v.iter()
370+
.map(KeyValue::estimated_detached_byte_size)
371+
.sum::<usize>()
372+
+ v.len() * std::mem::size_of::<KeyValue>()
373+
}
374+
KeyValue::Bool(_)
375+
| KeyValue::Int64(_)
376+
| KeyValue::Range(_)
377+
| KeyValue::Uuid(_)
378+
| KeyValue::Date(_) => 0,
379+
}
380+
}
363381
}
364382

365383
#[derive(Debug, Clone, PartialEq, Deserialize)]
@@ -548,6 +566,57 @@ impl BasicValue {
548566
BasicValue::UnionVariant { .. } => "union",
549567
}
550568
}
569+
570+
/// Returns the estimated byte size of the value, for detached data (i.e. allocated on heap).
571+
pub fn estimated_detached_byte_size(&self) -> usize {
572+
fn json_estimated_detached_byte_size(val: &serde_json::Value) -> usize {
573+
match val {
574+
serde_json::Value::String(s) => s.len(),
575+
serde_json::Value::Array(arr) => {
576+
arr.iter()
577+
.map(json_estimated_detached_byte_size)
578+
.sum::<usize>()
579+
+ arr.len() * std::mem::size_of::<serde_json::Value>()
580+
}
581+
serde_json::Value::Object(map) => map
582+
.iter()
583+
.map(|(k, v)| {
584+
std::mem::size_of::<serde_json::map::Entry>()
585+
+ k.len()
586+
+ json_estimated_detached_byte_size(v)
587+
})
588+
.sum(),
589+
serde_json::Value::Null
590+
| serde_json::Value::Bool(_)
591+
| serde_json::Value::Number(_) => 0,
592+
}
593+
}
594+
match self {
595+
BasicValue::Bytes(v) => v.len(),
596+
BasicValue::Str(v) => v.len(),
597+
BasicValue::Json(v) => json_estimated_detached_byte_size(v),
598+
BasicValue::Vector(v) => {
599+
v.iter()
600+
.map(BasicValue::estimated_detached_byte_size)
601+
.sum::<usize>()
602+
+ v.len() * std::mem::size_of::<BasicValue>()
603+
}
604+
BasicValue::UnionVariant { value, .. } => {
605+
value.estimated_detached_byte_size() + std::mem::size_of::<BasicValue>()
606+
}
607+
BasicValue::Bool(_)
608+
| BasicValue::Int64(_)
609+
| BasicValue::Float32(_)
610+
| BasicValue::Float64(_)
611+
| BasicValue::Range(_)
612+
| BasicValue::Uuid(_)
613+
| BasicValue::Date(_)
614+
| BasicValue::Time(_)
615+
| BasicValue::LocalDateTime(_)
616+
| BasicValue::OffsetDateTime(_)
617+
| BasicValue::TimeDelta(_) => 0,
618+
}
619+
}
551620
}
552621

553622
#[derive(Debug, Clone, Default, PartialEq, Deserialize)]
@@ -786,6 +855,31 @@ impl<VS> Value<VS> {
786855
}
787856
}
788857

858+
impl Value<ScopeValue> {
859+
pub fn estimated_byte_size(&self) -> usize {
860+
std::mem::size_of::<Self>()
861+
+ match self {
862+
Value::Null => 0,
863+
Value::Basic(v) => v.estimated_detached_byte_size(),
864+
Value::Struct(v) => v.estimated_detached_byte_size(),
865+
(Value::UTable(v) | Value::LTable(v)) => {
866+
v.iter()
867+
.map(|v| v.estimated_detached_byte_size())
868+
.sum::<usize>()
869+
+ v.len() * std::mem::size_of::<ScopeValue>()
870+
}
871+
Value::KTable(v) => {
872+
v.iter()
873+
.map(|(k, v)| {
874+
k.estimated_detached_byte_size() + v.estimated_detached_byte_size()
875+
})
876+
.sum::<usize>()
877+
+ v.len() * std::mem::size_of::<(String, ScopeValue)>()
878+
}
879+
}
880+
}
881+
}
882+
789883
#[derive(Debug, Clone, PartialEq, Deserialize)]
790884
pub struct FieldValues<VS = ScopeValue> {
791885
pub fields: Vec<Value<VS>>,
@@ -860,6 +954,16 @@ where
860954
}
861955
}
862956

957+
impl FieldValues<ScopeValue> {
958+
pub fn estimated_detached_byte_size(&self) -> usize {
959+
self.fields
960+
.iter()
961+
.map(Value::estimated_byte_size)
962+
.sum::<usize>()
963+
+ self.fields.len() * std::mem::size_of::<Value<ScopeValue>>()
964+
}
965+
}
966+
863967
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
864968
pub struct ScopeValue(pub FieldValues);
865969

@@ -1211,3 +1315,224 @@ pub mod test_util {
12111315
Ok(roundtrip_value)
12121316
}
12131317
}
1318+
1319+
#[cfg(test)]
1320+
mod tests {
1321+
use super::*;
1322+
use std::collections::BTreeMap;
1323+
1324+
#[test]
1325+
fn test_estimated_byte_size_null() {
1326+
let value = Value::<ScopeValue>::Null;
1327+
let size = value.estimated_byte_size();
1328+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1329+
}
1330+
1331+
#[test]
1332+
fn test_estimated_byte_size_basic_primitive() {
1333+
// Test primitives that should have 0 detached byte size
1334+
let value = Value::<ScopeValue>::Basic(BasicValue::Bool(true));
1335+
let size = value.estimated_byte_size();
1336+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1337+
1338+
let value = Value::<ScopeValue>::Basic(BasicValue::Int64(42));
1339+
let size = value.estimated_byte_size();
1340+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1341+
1342+
let value = Value::<ScopeValue>::Basic(BasicValue::Float64(3.14));
1343+
let size = value.estimated_byte_size();
1344+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1345+
}
1346+
1347+
#[test]
1348+
fn test_estimated_byte_size_basic_string() {
1349+
let test_str = "hello world";
1350+
let value = Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(test_str)));
1351+
let size = value.estimated_byte_size();
1352+
1353+
let expected_size = std::mem::size_of::<Value<ScopeValue>>() + test_str.len();
1354+
assert_eq!(size, expected_size);
1355+
}
1356+
1357+
#[test]
1358+
fn test_estimated_byte_size_basic_bytes() {
1359+
let test_bytes = b"hello world";
1360+
let value = Value::<ScopeValue>::Basic(BasicValue::Bytes(Bytes::from(test_bytes.to_vec())));
1361+
let size = value.estimated_byte_size();
1362+
1363+
let expected_size = std::mem::size_of::<Value<ScopeValue>>() + test_bytes.len();
1364+
assert_eq!(size, expected_size);
1365+
}
1366+
1367+
#[test]
1368+
fn test_estimated_byte_size_basic_json() {
1369+
let json_val = serde_json::json!({"key": "value", "number": 42});
1370+
let value = Value::<ScopeValue>::Basic(BasicValue::Json(Arc::from(json_val)));
1371+
let size = value.estimated_byte_size();
1372+
1373+
// Should include the size of the JSON structure
1374+
// The exact size depends on the internal JSON representation
1375+
assert!(size > std::mem::size_of::<Value<ScopeValue>>());
1376+
}
1377+
1378+
#[test]
1379+
fn test_estimated_byte_size_basic_vector() {
1380+
let vec_elements = vec![
1381+
BasicValue::Str(Arc::from("hello")),
1382+
BasicValue::Str(Arc::from("world")),
1383+
BasicValue::Int64(42),
1384+
];
1385+
let value = Value::<ScopeValue>::Basic(BasicValue::Vector(Arc::from(vec_elements)));
1386+
let size = value.estimated_byte_size();
1387+
1388+
// Should include the size of the vector elements
1389+
let expected_min_size = std::mem::size_of::<Value<ScopeValue>>()
1390+
+ "hello".len()
1391+
+ "world".len()
1392+
+ 3 * std::mem::size_of::<BasicValue>();
1393+
assert!(size >= expected_min_size);
1394+
}
1395+
1396+
#[test]
1397+
fn test_estimated_byte_size_struct() {
1398+
let fields = vec![
1399+
Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from("test"))),
1400+
Value::<ScopeValue>::Basic(BasicValue::Int64(123)),
1401+
];
1402+
let field_values = FieldValues { fields };
1403+
let value = Value::<ScopeValue>::Struct(field_values);
1404+
let size = value.estimated_byte_size();
1405+
1406+
let expected_min_size = std::mem::size_of::<Value<ScopeValue>>()
1407+
+ "test".len()
1408+
+ 2 * std::mem::size_of::<Value<ScopeValue>>();
1409+
assert!(size >= expected_min_size);
1410+
}
1411+
1412+
#[test]
1413+
fn test_estimated_byte_size_utable() {
1414+
let scope_values = vec![
1415+
ScopeValue(FieldValues {
1416+
fields: vec![Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(
1417+
"item1",
1418+
)))],
1419+
}),
1420+
ScopeValue(FieldValues {
1421+
fields: vec![Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(
1422+
"item2",
1423+
)))],
1424+
}),
1425+
];
1426+
let value = Value::<ScopeValue>::UTable(scope_values);
1427+
let size = value.estimated_byte_size();
1428+
1429+
let expected_min_size = std::mem::size_of::<Value<ScopeValue>>()
1430+
+ "item1".len()
1431+
+ "item2".len()
1432+
+ 2 * std::mem::size_of::<ScopeValue>();
1433+
assert!(size >= expected_min_size);
1434+
}
1435+
1436+
#[test]
1437+
fn test_estimated_byte_size_ltable() {
1438+
let scope_values = vec![
1439+
ScopeValue(FieldValues {
1440+
fields: vec![Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(
1441+
"list1",
1442+
)))],
1443+
}),
1444+
ScopeValue(FieldValues {
1445+
fields: vec![Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(
1446+
"list2",
1447+
)))],
1448+
}),
1449+
];
1450+
let value = Value::<ScopeValue>::LTable(scope_values);
1451+
let size = value.estimated_byte_size();
1452+
1453+
let expected_min_size = std::mem::size_of::<Value<ScopeValue>>()
1454+
+ "list1".len()
1455+
+ "list2".len()
1456+
+ 2 * std::mem::size_of::<ScopeValue>();
1457+
assert!(size >= expected_min_size);
1458+
}
1459+
1460+
#[test]
1461+
fn test_estimated_byte_size_ktable() {
1462+
let mut map = BTreeMap::new();
1463+
map.insert(
1464+
KeyValue::Str(Arc::from("key1")),
1465+
ScopeValue(FieldValues {
1466+
fields: vec![Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(
1467+
"value1",
1468+
)))],
1469+
}),
1470+
);
1471+
map.insert(
1472+
KeyValue::Str(Arc::from("key2")),
1473+
ScopeValue(FieldValues {
1474+
fields: vec![Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from(
1475+
"value2",
1476+
)))],
1477+
}),
1478+
);
1479+
let value = Value::<ScopeValue>::KTable(map);
1480+
let size = value.estimated_byte_size();
1481+
1482+
let expected_min_size = std::mem::size_of::<Value<ScopeValue>>()
1483+
+ "key1".len()
1484+
+ "key2".len()
1485+
+ "value1".len()
1486+
+ "value2".len()
1487+
+ 2 * std::mem::size_of::<(String, ScopeValue)>();
1488+
assert!(size >= expected_min_size);
1489+
}
1490+
1491+
#[test]
1492+
fn test_estimated_byte_size_nested_struct() {
1493+
let inner_struct = Value::<ScopeValue>::Struct(FieldValues {
1494+
fields: vec![
1495+
Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from("inner"))),
1496+
Value::<ScopeValue>::Basic(BasicValue::Int64(456)),
1497+
],
1498+
});
1499+
1500+
let outer_struct = Value::<ScopeValue>::Struct(FieldValues {
1501+
fields: vec![
1502+
Value::<ScopeValue>::Basic(BasicValue::Str(Arc::from("outer"))),
1503+
inner_struct,
1504+
],
1505+
});
1506+
1507+
let size = outer_struct.estimated_byte_size();
1508+
1509+
let expected_min_size = std::mem::size_of::<Value<ScopeValue>>()
1510+
+ "outer".len()
1511+
+ "inner".len()
1512+
+ 4 * std::mem::size_of::<Value<ScopeValue>>();
1513+
assert!(size >= expected_min_size);
1514+
}
1515+
1516+
#[test]
1517+
fn test_estimated_byte_size_empty_collections() {
1518+
// Empty UTable
1519+
let value = Value::<ScopeValue>::UTable(vec![]);
1520+
let size = value.estimated_byte_size();
1521+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1522+
1523+
// Empty LTable
1524+
let value = Value::<ScopeValue>::LTable(vec![]);
1525+
let size = value.estimated_byte_size();
1526+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1527+
1528+
// Empty KTable
1529+
let value = Value::<ScopeValue>::KTable(BTreeMap::new());
1530+
let size = value.estimated_byte_size();
1531+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1532+
1533+
// Empty Struct
1534+
let value = Value::<ScopeValue>::Struct(FieldValues { fields: vec![] });
1535+
let size = value.estimated_byte_size();
1536+
assert_eq!(size, std::mem::size_of::<Value<ScopeValue>>());
1537+
}
1538+
}

0 commit comments

Comments
 (0)