Skip to content

Commit 3f5d748

Browse files
fix: hierarchical json flattening restriction
get the level of hierarchy from the json perform generic flattening only if level of nesting is <=4
1 parent d9ae6b8 commit 3f5d748

File tree

2 files changed

+126
-40
lines changed

2 files changed

+126
-40
lines changed

src/utils/json/flatten.rs

Lines changed: 97 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
use std::collections::BTreeMap;
2020
use std::num::NonZeroU32;
2121

22+
use anyhow::anyhow;
2223
use chrono::{DateTime, Duration, Utc};
2324
use serde_json::map::Map;
2425
use serde_json::value::Value;
@@ -273,59 +274,90 @@ pub fn flatten_array_objects(
273274
/// Recursively flattens a JSON value.
274275
/// - If the value is an array, it flattens all elements of the array.
275276
/// - If the value is an object, it flattens all nested objects and arrays.
277+
/// - If the JSON value is heavily nested (with more than 4 levels of hierarchy), returns error
276278
/// - Otherwise, it returns the value itself in a vector.
277279
///
278280
/// Examples:
279281
/// 1. `{"a": 1}` ~> `[{"a": 1}]`
280282
/// 2. `[{"a": 1}, {"b": 2}]` ~> `[{"a": 1}, {"b": 2}]`
281283
/// 3. `[{"a": [{"b": 1}, {"c": 2}]}]` ~> `[{"a": {"b": 1)}}, {"a": {"c": 2)}}]`
282-
/// 3. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> `[{"a": {"b":1}, "d": {"e":4}}, {"a": {"c":2}, "d": {"e":4}}]`
283-
fn flattening_helper(value: &Value) -> Vec<Value> {
284+
/// 4. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> `[{"a": {"b":1}, "d": {"e":4}}, {"a": {"c":2}, "d": {"e":4}}]`
285+
/// 5. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns error - heavily nested, cannot flatten this JSON
286+
fn flattening_helper(value: &Value) -> Result<Vec<Value>, anyhow::Error> {
287+
if has_more_than_four_levels(value, 1) {
288+
return Err(anyhow!("heavily nested, cannot flatten this JSON"));
289+
}
290+
284291
match value {
285-
Value::Array(arr) => arr.iter().flat_map(flattening_helper).collect(),
286-
Value::Object(map) => map
292+
Value::Array(arr) => Ok(arr
287293
.iter()
288-
.fold(vec![Map::new()], |results, (key, val)| match val {
289-
Value::Array(arr) => arr
290-
.iter()
291-
.flat_map(flattening_helper)
292-
.flat_map(|flattened_item| {
293-
results.iter().map(move |result| {
294-
let mut new_obj = result.clone();
295-
new_obj.insert(key.clone(), flattened_item.clone());
296-
new_obj
294+
.flat_map(|flatten_item| flattening_helper(flatten_item).unwrap_or_default())
295+
.collect()),
296+
Value::Object(map) => {
297+
let results = map
298+
.iter()
299+
.fold(vec![Map::new()], |results, (key, val)| match val {
300+
Value::Array(arr) => arr
301+
.iter()
302+
.flat_map(|flatten_item| flattening_helper(flatten_item).unwrap_or_default())
303+
.flat_map(|flattened_item| {
304+
results.iter().map(move |result| {
305+
let mut new_obj = result.clone();
306+
new_obj.insert(key.clone(), flattened_item.clone());
307+
new_obj
308+
})
309+
})
310+
.collect(),
311+
Value::Object(_) => flattening_helper(val)
312+
.unwrap_or_default()
313+
.iter()
314+
.flat_map(|nested_result| {
315+
results.iter().map(move |result| {
316+
let mut new_obj = result.clone();
317+
new_obj.insert(key.clone(), nested_result.clone());
318+
new_obj
319+
})
297320
})
298-
})
299-
.collect(),
300-
Value::Object(_) => flattening_helper(val)
301-
.iter()
302-
.flat_map(|nested_result| {
303-
results.iter().map(move |result| {
304-
let mut new_obj = result.clone();
305-
new_obj.insert(key.clone(), nested_result.clone());
306-
new_obj
321+
.collect(),
322+
_ => results
323+
.into_iter()
324+
.map(|mut result| {
325+
result.insert(key.clone(), val.clone());
326+
result
307327
})
308-
})
309-
.collect(),
310-
_ => results
311-
.into_iter()
312-
.map(|mut result| {
313-
result.insert(key.clone(), val.clone());
314-
result
315-
})
316-
.collect(),
317-
})
318-
.into_iter()
319-
.map(Value::Object)
320-
.collect(),
321-
_ => vec![value.clone()],
328+
.collect(),
329+
});
330+
331+
Ok(results.into_iter().map(Value::Object).collect())
332+
}
333+
_ => Ok(vec![value.clone()]),
334+
}
335+
}
336+
337+
/// recursively checks the level of nesting for the serde Value
338+
/// if Value has more than 4 levels of hierarchy, returns true
339+
/// example -
340+
/// 1. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns true
341+
/// 2. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> returns false
342+
fn has_more_than_four_levels(value: &Value, current_level: usize) -> bool {
343+
if current_level > 4 {
344+
return true;
345+
}
346+
match value {
347+
Value::Array(arr) => arr
348+
.iter()
349+
.any(|item| has_more_than_four_levels(item, current_level)),
350+
Value::Object(map) => map
351+
.values()
352+
.any(|val| has_more_than_four_levels(val, current_level + 1)),
353+
_ => false,
322354
}
323355
}
324356

325357
// Converts a Vector of values into a `Value::Array`, as long as all of them are objects
326358
pub fn generic_flattening(json: Value) -> Result<Value, JsonFlattenError> {
327359
let mut flattened = Vec::new();
328-
for item in flattening_helper(&json) {
360+
for item in flattening_helper(&json).unwrap_or_default() {
329361
let mut map = Map::new();
330362
let Some(item) = item.as_object() else {
331363
return Err(JsonFlattenError::ExpectedObjectInArray);
@@ -341,7 +373,9 @@ pub fn generic_flattening(json: Value) -> Result<Value, JsonFlattenError> {
341373

342374
#[cfg(test)]
343375
mod tests {
344-
use crate::utils::json::flatten::flatten_array_objects;
376+
use crate::utils::json::flatten::{
377+
flatten_array_objects, generic_flattening, has_more_than_four_levels,
378+
};
345379

346380
use super::{flatten, JsonFlattenError};
347381
use serde_json::{json, Map, Value};
@@ -599,4 +633,29 @@ mod tests {
599633
JsonFlattenError::FieldContainsPeriod(_)
600634
);
601635
}
636+
637+
#[test]
638+
fn unacceptable_levels_of_nested_json() {
639+
let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}});
640+
assert_eq!(has_more_than_four_levels(&value, 1), true);
641+
}
642+
643+
#[test]
644+
fn acceptable_levels_of_nested_json() {
645+
let value = json!({"a":{"b":{"e":["a","b"]}}});
646+
assert_eq!(has_more_than_four_levels(&value, 1), false);
647+
}
648+
649+
#[test]
650+
fn flatten_json_success() {
651+
let value = json!({"a":{"b":{"e":["a","b"]}}});
652+
let expected = json!([{"a":{"b":{"e":"a"}}},{"a":{"b":{"e":"b"}}}]);
653+
assert_eq!(generic_flattening(value).unwrap(), expected);
654+
}
655+
656+
#[test]
657+
fn flatten_json_error() {
658+
let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}});
659+
assert!(generic_flattening(value).is_err());
660+
}
602661
}

src/utils/json/mod.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ use crate::metadata::SchemaVersion;
2525

2626
pub mod flatten;
2727

28+
/// calls the function `flatten_json` which results Vec<Value> or Error
29+
/// in case when Vec<Value> is returned, converts the Vec<Value> to Value of Array
30+
/// this is to ensure recursive flattening does not happen for heavily nested jsons
2831
pub fn flatten_json_body(
2932
body: Value,
3033
time_partition: Option<&String>,
@@ -38,7 +41,6 @@ pub fn flatten_json_body(
3841
} else {
3942
body
4043
};
41-
4244
flatten::flatten(
4345
&mut nested_value,
4446
"_",
@@ -47,7 +49,6 @@ pub fn flatten_json_body(
4749
custom_partition,
4850
validation_required,
4951
)?;
50-
5152
Ok(nested_value)
5253
}
5354

@@ -93,3 +94,29 @@ pub fn convert_to_string(value: &Value) -> Value {
9394
}
9495
}
9596
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use super::flatten_json_body;
101+
use serde_json::json;
102+
103+
#[test]
104+
fn hierarchical_json_flattening_success() {
105+
let value = json!({"a":{"b":{"e":["a","b"]}}});
106+
let expected = json!([{"a_b_e": "a"}, {"a_b_e": "b"}]);
107+
assert_eq!(
108+
flatten_json_body(value, None, None, None, crate::metadata::SchemaVersion::V1, false).unwrap(),
109+
expected
110+
);
111+
}
112+
113+
#[test]
114+
fn hierarchical_json_flattening_failure() {
115+
let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}});
116+
let expected = json!({"a_b_c_d_e": ["a","b"]});
117+
assert_eq!(
118+
flatten_json_body(value, None, None, None,crate::metadata::SchemaVersion::V1, false).unwrap(),
119+
expected
120+
);
121+
}
122+
}

0 commit comments

Comments
 (0)