From 3f5d748bce79af4cd3e7aecac5eb9ac9c96c9501 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Mon, 30 Dec 2024 00:59:13 -0500 Subject: [PATCH 1/6] fix: hierarchical json flattening restriction get the level of hierarchy from the json perform generic flattening only if level of nesting is <=4 --- src/utils/json/flatten.rs | 135 +++++++++++++++++++++++++++----------- src/utils/json/mod.rs | 31 ++++++++- 2 files changed, 126 insertions(+), 40 deletions(-) diff --git a/src/utils/json/flatten.rs b/src/utils/json/flatten.rs index afd17ace6..90deddeaf 100644 --- a/src/utils/json/flatten.rs +++ b/src/utils/json/flatten.rs @@ -19,6 +19,7 @@ use std::collections::BTreeMap; use std::num::NonZeroU32; +use anyhow::anyhow; use chrono::{DateTime, Duration, Utc}; use serde_json::map::Map; use serde_json::value::Value; @@ -273,59 +274,90 @@ pub fn flatten_array_objects( /// Recursively flattens a JSON value. /// - If the value is an array, it flattens all elements of the array. /// - If the value is an object, it flattens all nested objects and arrays. +/// - If the JSON value is heavily nested (with more than 4 levels of hierarchy), returns error /// - Otherwise, it returns the value itself in a vector. /// /// Examples: /// 1. `{"a": 1}` ~> `[{"a": 1}]` /// 2. `[{"a": 1}, {"b": 2}]` ~> `[{"a": 1}, {"b": 2}]` /// 3. `[{"a": [{"b": 1}, {"c": 2}]}]` ~> `[{"a": {"b": 1)}}, {"a": {"c": 2)}}]` -/// 3. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> `[{"a": {"b":1}, "d": {"e":4}}, {"a": {"c":2}, "d": {"e":4}}]` -fn flattening_helper(value: &Value) -> Vec { +/// 4. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> `[{"a": {"b":1}, "d": {"e":4}}, {"a": {"c":2}, "d": {"e":4}}]` +/// 5. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns error - heavily nested, cannot flatten this JSON +fn flattening_helper(value: &Value) -> Result, anyhow::Error> { + if has_more_than_four_levels(value, 1) { + return Err(anyhow!("heavily nested, cannot flatten this JSON")); + } + match value { - Value::Array(arr) => arr.iter().flat_map(flattening_helper).collect(), - Value::Object(map) => map + Value::Array(arr) => Ok(arr .iter() - .fold(vec![Map::new()], |results, (key, val)| match val { - Value::Array(arr) => arr - .iter() - .flat_map(flattening_helper) - .flat_map(|flattened_item| { - results.iter().map(move |result| { - let mut new_obj = result.clone(); - new_obj.insert(key.clone(), flattened_item.clone()); - new_obj + .flat_map(|flatten_item| flattening_helper(flatten_item).unwrap_or_default()) + .collect()), + Value::Object(map) => { + let results = map + .iter() + .fold(vec![Map::new()], |results, (key, val)| match val { + Value::Array(arr) => arr + .iter() + .flat_map(|flatten_item| flattening_helper(flatten_item).unwrap_or_default()) + .flat_map(|flattened_item| { + results.iter().map(move |result| { + let mut new_obj = result.clone(); + new_obj.insert(key.clone(), flattened_item.clone()); + new_obj + }) + }) + .collect(), + Value::Object(_) => flattening_helper(val) + .unwrap_or_default() + .iter() + .flat_map(|nested_result| { + results.iter().map(move |result| { + let mut new_obj = result.clone(); + new_obj.insert(key.clone(), nested_result.clone()); + new_obj + }) }) - }) - .collect(), - Value::Object(_) => flattening_helper(val) - .iter() - .flat_map(|nested_result| { - results.iter().map(move |result| { - let mut new_obj = result.clone(); - new_obj.insert(key.clone(), nested_result.clone()); - new_obj + .collect(), + _ => results + .into_iter() + .map(|mut result| { + result.insert(key.clone(), val.clone()); + result }) - }) - .collect(), - _ => results - .into_iter() - .map(|mut result| { - result.insert(key.clone(), val.clone()); - result - }) - .collect(), - }) - .into_iter() - .map(Value::Object) - .collect(), - _ => vec![value.clone()], + .collect(), + }); + + Ok(results.into_iter().map(Value::Object).collect()) + } + _ => Ok(vec![value.clone()]), + } +} + +/// recursively checks the level of nesting for the serde Value +/// if Value has more than 4 levels of hierarchy, returns true +/// example - +/// 1. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns true +/// 2. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> returns false +fn has_more_than_four_levels(value: &Value, current_level: usize) -> bool { + if current_level > 4 { + return true; + } + match value { + Value::Array(arr) => arr + .iter() + .any(|item| has_more_than_four_levels(item, current_level)), + Value::Object(map) => map + .values() + .any(|val| has_more_than_four_levels(val, current_level + 1)), + _ => false, } } // Converts a Vector of values into a `Value::Array`, as long as all of them are objects pub fn generic_flattening(json: Value) -> Result { let mut flattened = Vec::new(); - for item in flattening_helper(&json) { + for item in flattening_helper(&json).unwrap_or_default() { let mut map = Map::new(); let Some(item) = item.as_object() else { return Err(JsonFlattenError::ExpectedObjectInArray); @@ -341,7 +373,9 @@ pub fn generic_flattening(json: Value) -> Result { #[cfg(test)] mod tests { - use crate::utils::json::flatten::flatten_array_objects; + use crate::utils::json::flatten::{ + flatten_array_objects, generic_flattening, has_more_than_four_levels, + }; use super::{flatten, JsonFlattenError}; use serde_json::{json, Map, Value}; @@ -599,4 +633,29 @@ mod tests { JsonFlattenError::FieldContainsPeriod(_) ); } + + #[test] + fn unacceptable_levels_of_nested_json() { + let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); + assert_eq!(has_more_than_four_levels(&value, 1), true); + } + + #[test] + fn acceptable_levels_of_nested_json() { + let value = json!({"a":{"b":{"e":["a","b"]}}}); + assert_eq!(has_more_than_four_levels(&value, 1), false); + } + + #[test] + fn flatten_json_success() { + let value = json!({"a":{"b":{"e":["a","b"]}}}); + let expected = json!([{"a":{"b":{"e":"a"}}},{"a":{"b":{"e":"b"}}}]); + assert_eq!(generic_flattening(value).unwrap(), expected); + } + + #[test] + fn flatten_json_error() { + let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); + assert!(generic_flattening(value).is_err()); + } } diff --git a/src/utils/json/mod.rs b/src/utils/json/mod.rs index 1ef31c804..9a0550f01 100644 --- a/src/utils/json/mod.rs +++ b/src/utils/json/mod.rs @@ -25,6 +25,9 @@ use crate::metadata::SchemaVersion; pub mod flatten; +/// calls the function `flatten_json` which results Vec or Error +/// in case when Vec is returned, converts the Vec to Value of Array +/// this is to ensure recursive flattening does not happen for heavily nested jsons pub fn flatten_json_body( body: Value, time_partition: Option<&String>, @@ -38,7 +41,6 @@ pub fn flatten_json_body( } else { body }; - flatten::flatten( &mut nested_value, "_", @@ -47,7 +49,6 @@ pub fn flatten_json_body( custom_partition, validation_required, )?; - Ok(nested_value) } @@ -93,3 +94,29 @@ pub fn convert_to_string(value: &Value) -> Value { } } } + +#[cfg(test)] +mod tests { + use super::flatten_json_body; + use serde_json::json; + + #[test] + fn hierarchical_json_flattening_success() { + let value = json!({"a":{"b":{"e":["a","b"]}}}); + let expected = json!([{"a_b_e": "a"}, {"a_b_e": "b"}]); + assert_eq!( + flatten_json_body(value, None, None, None, crate::metadata::SchemaVersion::V1, false).unwrap(), + expected + ); + } + + #[test] + fn hierarchical_json_flattening_failure() { + let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); + let expected = json!({"a_b_c_d_e": ["a","b"]}); + assert_eq!( + flatten_json_body(value, None, None, None,crate::metadata::SchemaVersion::V1, false).unwrap(), + expected + ); + } +} From e645168b4e531e88b6832f66274523dccdd990ad Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Mon, 30 Dec 2024 01:50:29 -0500 Subject: [PATCH 2/6] deepsource: test methods - assert_eq updated to assert --- src/utils/json/flatten.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/json/flatten.rs b/src/utils/json/flatten.rs index 90deddeaf..47e998cd1 100644 --- a/src/utils/json/flatten.rs +++ b/src/utils/json/flatten.rs @@ -637,13 +637,13 @@ mod tests { #[test] fn unacceptable_levels_of_nested_json() { let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); - assert_eq!(has_more_than_four_levels(&value, 1), true); + assert!(has_more_than_four_levels(&value, 1)); } #[test] fn acceptable_levels_of_nested_json() { let value = json!({"a":{"b":{"e":["a","b"]}}}); - assert_eq!(has_more_than_four_levels(&value, 1), false); + assert!(!has_more_than_four_levels(&value, 1)); } #[test] From adf87dd1780ac54be90cad4db433150507b03a99 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Sat, 4 Jan 2025 02:32:36 -0500 Subject: [PATCH 3/6] fix: ingestion flow and generic flattening --- src/handlers/http/modal/utils/ingest_utils.rs | 202 ++++++++++++++---- src/utils/json/flatten.rs | 32 +-- src/utils/json/mod.rs | 24 ++- 3 files changed, 195 insertions(+), 63 deletions(-) diff --git a/src/handlers/http/modal/utils/ingest_utils.rs b/src/handlers/http/modal/utils/ingest_utils.rs index 69d22c2e5..4ea637c00 100644 --- a/src/handlers/http/modal/utils/ingest_utils.rs +++ b/src/handlers/http/modal/utils/ingest_utils.rs @@ -23,13 +23,12 @@ use anyhow::anyhow; use arrow_schema::Field; use bytes::Bytes; use chrono::{DateTime, NaiveDateTime, Utc}; -use itertools::Itertools; use serde_json::Value; use crate::{ event::{ + self, format::{self, EventFormat}, - Event, }, handlers::{ http::{ingest::PostError, kinesis}, @@ -73,61 +72,174 @@ pub async fn push_logs( let custom_partition = STREAM_INFO.get_custom_partition(stream_name)?; let schema_version = STREAM_INFO.get_schema_version(stream_name)?; let body_val: Value = serde_json::from_slice(body)?; - let data = convert_array_to_object( - body_val, - time_partition.as_ref(), - time_partition_limit, - custom_partition.as_ref(), - schema_version, - )?; - for value in data { - let origin_size = serde_json::to_vec(&value).unwrap().len() as u64; // string length need not be the same as byte length - let parsed_timestamp = match time_partition.as_ref() { - Some(time_partition) => get_parsed_timestamp(&value, time_partition)?, - _ => Utc::now().naive_utc(), - }; - let custom_partition_values = match custom_partition.as_ref() { - Some(custom_partition) => { - let custom_partitions = custom_partition.split(',').collect_vec(); - get_custom_partition_values(&value, &custom_partitions) + let size: usize = body.len(); + let mut parsed_timestamp = Utc::now().naive_utc(); + if time_partition.is_none() { + if custom_partition.is_none() { + let size = size as u64; + create_process_record_batch( + stream_name, + req, + body_val, + static_schema_flag.as_ref(), + None, + parsed_timestamp, + &HashMap::new(), + size, + schema_version, + ) + .await?; + } else { + let data = convert_array_to_object( + body_val, + None, + None, + custom_partition.as_ref(), + schema_version, + )?; + let custom_partition = custom_partition.unwrap(); + let custom_partition_list = custom_partition.split(',').collect::>(); + + for value in data { + let custom_partition_values = + get_custom_partition_values(&value, &custom_partition_list); + + let size = value.to_string().into_bytes().len() as u64; + create_process_record_batch( + stream_name, + req, + value, + static_schema_flag.as_ref(), + None, + parsed_timestamp, + &custom_partition_values, + size, + schema_version, + ) + .await?; } - None => HashMap::new(), - }; - let schema = STREAM_INFO - .read() - .unwrap() - .get(stream_name) - .ok_or(PostError::StreamNotFound(stream_name.to_owned()))? - .schema - .clone(); - let (rb, is_first_event) = into_event_batch( - req, - &value, - schema, - static_schema_flag.as_ref(), + } + } else if custom_partition.is_none() { + let data = convert_array_to_object( + body_val, + time_partition.as_ref(), + time_partition_limit, + None, + schema_version, + )?; + for value in data { + parsed_timestamp = get_parsed_timestamp(&value, time_partition.as_ref().unwrap())?; + let size = value.to_string().into_bytes().len() as u64; + create_process_record_batch( + stream_name, + req, + value, + static_schema_flag.as_ref(), + time_partition.as_ref(), + parsed_timestamp, + &HashMap::new(), + size, + schema_version, + ) + .await?; + } + } else { + let data = convert_array_to_object( + body_val, time_partition.as_ref(), + time_partition_limit, + custom_partition.as_ref(), schema_version, )?; + let custom_partition = custom_partition.unwrap(); + let custom_partition_list = custom_partition.split(',').collect::>(); - Event { - rb, - stream_name: stream_name.to_owned(), - origin_format: "json", - origin_size, - is_first_event, - parsed_timestamp, - time_partition: time_partition.clone(), - custom_partition_values, - stream_type: StreamType::UserDefined, + for value in data { + let custom_partition_values = + get_custom_partition_values(&value, &custom_partition_list); + + parsed_timestamp = get_parsed_timestamp(&value, time_partition.as_ref().unwrap())?; + let size = value.to_string().into_bytes().len() as u64; + create_process_record_batch( + stream_name, + req, + value, + static_schema_flag.as_ref(), + time_partition.as_ref(), + parsed_timestamp, + &custom_partition_values, + size, + schema_version, + ) + .await?; } - .process() - .await?; } Ok(()) } +#[allow(clippy::too_many_arguments)] +pub async fn create_process_record_batch( + stream_name: &str, + req: &HttpRequest, + value: Value, + static_schema_flag: Option<&String>, + time_partition: Option<&String>, + parsed_timestamp: NaiveDateTime, + custom_partition_values: &HashMap, + origin_size: u64, + schema_version: SchemaVersion, +) -> Result<(), PostError> { + let (rb, is_first_event) = get_stream_schema( + stream_name, + req, + &value, + static_schema_flag, + time_partition, + schema_version, + )?; + event::Event { + rb, + stream_name: stream_name.to_owned(), + origin_format: "json", + origin_size, + is_first_event, + parsed_timestamp, + time_partition: time_partition.cloned(), + custom_partition_values: custom_partition_values.clone(), + stream_type: StreamType::UserDefined, + } + .process() + .await?; + + Ok(()) +} + +pub fn get_stream_schema( + stream_name: &str, + req: &HttpRequest, + body: &Value, + static_schema_flag: Option<&String>, + time_partition: Option<&String>, + schema_version: SchemaVersion, +) -> Result<(arrow_array::RecordBatch, bool), PostError> { + let hash_map = STREAM_INFO.read().unwrap(); + let schema = hash_map + .get(stream_name) + .ok_or(PostError::StreamNotFound(stream_name.to_owned()))? + .schema + .clone(); + into_event_batch( + req, + body, + schema, + static_schema_flag, + time_partition, + schema_version, + ) +} + pub fn into_event_batch( req: &HttpRequest, body: &Value, diff --git a/src/utils/json/flatten.rs b/src/utils/json/flatten.rs index 47e998cd1..a837bbd01 100644 --- a/src/utils/json/flatten.rs +++ b/src/utils/json/flatten.rs @@ -19,7 +19,6 @@ use std::collections::BTreeMap; use std::num::NonZeroU32; -use anyhow::anyhow; use chrono::{DateTime, Duration, Utc}; use serde_json::map::Map; use serde_json::value::Value; @@ -50,6 +49,8 @@ pub enum JsonFlattenError { ExpectedObjectInArray, #[error("Found non-object element while flattening array of objects")] NonObjectInArray, + #[error("heavily nested, cannot flatten this JSON")] + HeavilyNestedJson, } // Recursively flattens JSON objects and arrays, e.g. with the separator `.`, starting from the TOP @@ -283,15 +284,15 @@ pub fn flatten_array_objects( /// 3. `[{"a": [{"b": 1}, {"c": 2}]}]` ~> `[{"a": {"b": 1)}}, {"a": {"c": 2)}}]` /// 4. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> `[{"a": {"b":1}, "d": {"e":4}}, {"a": {"c":2}, "d": {"e":4}}]` /// 5. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns error - heavily nested, cannot flatten this JSON -fn flattening_helper(value: &Value) -> Result, anyhow::Error> { +pub fn generic_flattening(value: &Value) -> Result, JsonFlattenError> { if has_more_than_four_levels(value, 1) { - return Err(anyhow!("heavily nested, cannot flatten this JSON")); + return Err(JsonFlattenError::HeavilyNestedJson); } match value { Value::Array(arr) => Ok(arr .iter() - .flat_map(|flatten_item| flattening_helper(flatten_item).unwrap_or_default()) + .flat_map(|flatten_item| generic_flattening(flatten_item).unwrap_or_default()) .collect()), Value::Object(map) => { let results = map @@ -299,7 +300,9 @@ fn flattening_helper(value: &Value) -> Result, anyhow::Error> { .fold(vec![Map::new()], |results, (key, val)| match val { Value::Array(arr) => arr .iter() - .flat_map(|flatten_item| flattening_helper(flatten_item).unwrap_or_default()) + .flat_map(|flatten_item| { + generic_flattening(flatten_item).unwrap_or_default() + }) .flat_map(|flattened_item| { results.iter().map(move |result| { let mut new_obj = result.clone(); @@ -308,7 +311,7 @@ fn flattening_helper(value: &Value) -> Result, anyhow::Error> { }) }) .collect(), - Value::Object(_) => flattening_helper(val) + Value::Object(_) => generic_flattening(val) .unwrap_or_default() .iter() .flat_map(|nested_result| { @@ -355,9 +358,9 @@ fn has_more_than_four_levels(value: &Value, current_level: usize) -> bool { } // Converts a Vector of values into a `Value::Array`, as long as all of them are objects -pub fn generic_flattening(json: Value) -> Result { - let mut flattened = Vec::new(); - for item in flattening_helper(&json).unwrap_or_default() { +pub fn convert_to_array(flattened: Vec) -> Result { + let mut result = Vec::new(); + for item in flattened { let mut map = Map::new(); let Some(item) = item.as_object() else { return Err(JsonFlattenError::ExpectedObjectInArray); @@ -365,10 +368,9 @@ pub fn generic_flattening(json: Value) -> Result { for (key, value) in item { map.insert(key.clone(), value.clone()); } - flattened.push(Value::Object(map)); + result.push(Value::Object(map)); } - - Ok(Value::Array(flattened)) + Ok(Value::Array(result)) } #[cfg(test)] @@ -649,13 +651,13 @@ mod tests { #[test] fn flatten_json_success() { let value = json!({"a":{"b":{"e":["a","b"]}}}); - let expected = json!([{"a":{"b":{"e":"a"}}},{"a":{"b":{"e":"b"}}}]); - assert_eq!(generic_flattening(value).unwrap(), expected); + let expected = vec![json!({"a":{"b":{"e":"a"}}}), json!({"a":{"b":{"e":"b"}}})]; + assert_eq!(generic_flattening(&value).unwrap(), expected); } #[test] fn flatten_json_error() { let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); - assert!(generic_flattening(value).is_err()); + assert!(generic_flattening(&value).is_err()); } } diff --git a/src/utils/json/mod.rs b/src/utils/json/mod.rs index 9a0550f01..6bd977552 100644 --- a/src/utils/json/mod.rs +++ b/src/utils/json/mod.rs @@ -37,7 +37,9 @@ pub fn flatten_json_body( validation_required: bool, ) -> Result { let mut nested_value = if schema_version == SchemaVersion::V1 { - flatten::generic_flattening(body)? + flatten::generic_flattening(&body) + .map(flatten::convert_to_array) + .unwrap_or(Ok(body))? } else { body }; @@ -105,7 +107,15 @@ mod tests { let value = json!({"a":{"b":{"e":["a","b"]}}}); let expected = json!([{"a_b_e": "a"}, {"a_b_e": "b"}]); assert_eq!( - flatten_json_body(value, None, None, None, crate::metadata::SchemaVersion::V1, false).unwrap(), + flatten_json_body( + value, + None, + None, + None, + crate::metadata::SchemaVersion::V1, + false + ) + .unwrap(), expected ); } @@ -115,7 +125,15 @@ mod tests { let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); let expected = json!({"a_b_c_d_e": ["a","b"]}); assert_eq!( - flatten_json_body(value, None, None, None,crate::metadata::SchemaVersion::V1, false).unwrap(), + flatten_json_body( + value, + None, + None, + None, + crate::metadata::SchemaVersion::V1, + false + ) + .unwrap(), expected ); } From 4a47e0775de77af0854b9d2015b252bfc26cf6f5 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Sat, 4 Jan 2025 02:41:32 -0500 Subject: [PATCH 4/6] deepsource fix --- src/utils/json/mod.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/utils/json/mod.rs b/src/utils/json/mod.rs index 6bd977552..be4fa729e 100644 --- a/src/utils/json/mod.rs +++ b/src/utils/json/mod.rs @@ -18,6 +18,7 @@ use std::num::NonZeroU32; +use flatten::{convert_to_array, generic_flattening}; use serde_json; use serde_json::Value; @@ -37,9 +38,11 @@ pub fn flatten_json_body( validation_required: bool, ) -> Result { let mut nested_value = if schema_version == SchemaVersion::V1 { - flatten::generic_flattening(&body) - .map(flatten::convert_to_array) - .unwrap_or(Ok(body))? + if let Ok(flattened_json) = generic_flattening(&body) { + convert_to_array(flattened_json)? + } else { + body + } } else { body }; From bd7e6521dd0819b9555869fd927119ee63951136 Mon Sep 17 00:00:00 2001 From: Devdutt Shenoi Date: Sun, 5 Jan 2025 14:51:01 +0530 Subject: [PATCH 5/6] perf: test nesting level only once --- src/utils/json/flatten.rs | 16 ++-------------- src/utils/json/mod.rs | 14 ++++++-------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/utils/json/flatten.rs b/src/utils/json/flatten.rs index a837bbd01..58809618c 100644 --- a/src/utils/json/flatten.rs +++ b/src/utils/json/flatten.rs @@ -49,8 +49,6 @@ pub enum JsonFlattenError { ExpectedObjectInArray, #[error("Found non-object element while flattening array of objects")] NonObjectInArray, - #[error("heavily nested, cannot flatten this JSON")] - HeavilyNestedJson, } // Recursively flattens JSON objects and arrays, e.g. with the separator `.`, starting from the TOP @@ -285,10 +283,6 @@ pub fn flatten_array_objects( /// 4. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> `[{"a": {"b":1}, "d": {"e":4}}, {"a": {"c":2}, "d": {"e":4}}]` /// 5. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns error - heavily nested, cannot flatten this JSON pub fn generic_flattening(value: &Value) -> Result, JsonFlattenError> { - if has_more_than_four_levels(value, 1) { - return Err(JsonFlattenError::HeavilyNestedJson); - } - match value { Value::Array(arr) => Ok(arr .iter() @@ -342,7 +336,7 @@ pub fn generic_flattening(value: &Value) -> Result, JsonFlattenError> /// example - /// 1. `{"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}` ~> returns true /// 2. `{"a": [{"b": 1}, {"c": 2}], "d": {"e": 4}}` ~> returns false -fn has_more_than_four_levels(value: &Value, current_level: usize) -> bool { +pub fn has_more_than_four_levels(value: &Value, current_level: usize) -> bool { if current_level > 4 { return true; } @@ -649,15 +643,9 @@ mod tests { } #[test] - fn flatten_json_success() { + fn flatten_json() { let value = json!({"a":{"b":{"e":["a","b"]}}}); let expected = vec![json!({"a":{"b":{"e":"a"}}}), json!({"a":{"b":{"e":"b"}}})]; assert_eq!(generic_flattening(&value).unwrap(), expected); } - - #[test] - fn flatten_json_error() { - let value = json!({"a":{"b":{"c":{"d":{"e":["a","b"]}}}}}); - assert!(generic_flattening(&value).is_err()); - } } diff --git a/src/utils/json/mod.rs b/src/utils/json/mod.rs index be4fa729e..6736d7ead 100644 --- a/src/utils/json/mod.rs +++ b/src/utils/json/mod.rs @@ -18,7 +18,7 @@ use std::num::NonZeroU32; -use flatten::{convert_to_array, generic_flattening}; +use flatten::{convert_to_array, generic_flattening, has_more_than_four_levels}; use serde_json; use serde_json::Value; @@ -37,14 +37,12 @@ pub fn flatten_json_body( schema_version: SchemaVersion, validation_required: bool, ) -> Result { - let mut nested_value = if schema_version == SchemaVersion::V1 { - if let Ok(flattened_json) = generic_flattening(&body) { - convert_to_array(flattened_json)? - } else { - body - } - } else { + // Flatten the json body only if new schema and has less than 4 levels of nesting + let mut nested_value = if schema_version == SchemaVersion::V0 || has_more_than_four_levels(&body, 1) { body + } else { + let flattened_json = generic_flattening(&body)?; + convert_to_array(flattened_json)? }; flatten::flatten( &mut nested_value, From 475b34300e1c5bbdee86e4043452afcbcf7ad233 Mon Sep 17 00:00:00 2001 From: Devdutt Shenoi Date: Sun, 5 Jan 2025 14:53:01 +0530 Subject: [PATCH 6/6] style: cargo fmt --- src/utils/json/mod.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/utils/json/mod.rs b/src/utils/json/mod.rs index 6736d7ead..0d3ac1e79 100644 --- a/src/utils/json/mod.rs +++ b/src/utils/json/mod.rs @@ -38,12 +38,13 @@ pub fn flatten_json_body( validation_required: bool, ) -> Result { // Flatten the json body only if new schema and has less than 4 levels of nesting - let mut nested_value = if schema_version == SchemaVersion::V0 || has_more_than_four_levels(&body, 1) { - body - } else { - let flattened_json = generic_flattening(&body)?; - convert_to_array(flattened_json)? - }; + let mut nested_value = + if schema_version == SchemaVersion::V0 || has_more_than_four_levels(&body, 1) { + body + } else { + let flattened_json = generic_flattening(&body)?; + convert_to_array(flattened_json)? + }; flatten::flatten( &mut nested_value, "_",