diff --git a/Cargo.lock b/Cargo.lock index 52de304dca..4ea3567934 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4923,6 +4923,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "digest" version = "0.10.7" @@ -6170,6 +6176,14 @@ dependencies = [ "num", ] +[[package]] +name = "fractured_json" +version = "0.1.0" +dependencies = [ + "pretty_assertions", + "serde_json", +] + [[package]] name = "freetype-sys" version = "0.20.1" @@ -13890,6 +13904,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -23396,6 +23420,12 @@ dependencies = [ "hashlink 0.10.0", ] +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yansi-term" version = "0.1.2" diff --git a/Cargo.toml b/Cargo.toml index 3238f7e9a8..8480d7b690 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ hypr-download-interface = { path = "crates/download-interface", package = "downl hypr-eval = { path = "crates/eval", package = "eval" } hypr-extensions-runtime = { path = "crates/extensions-runtime", package = "extensions-runtime" } hypr-file = { path = "crates/file", package = "file" } +hypr-fractured-json = { path = "crates/fractured-json", package = "fractured_json" } hypr-gbnf = { path = "crates/gbnf", package = "gbnf" } hypr-gguf = { path = "crates/gguf", package = "gguf" } hypr-granola = { path = "crates/granola", package = "granola" } diff --git a/crates/fractured-json/.gitignore b/crates/fractured-json/.gitignore new file mode 100644 index 0000000000..ea8c4bf7f3 --- /dev/null +++ b/crates/fractured-json/.gitignore @@ -0,0 +1 @@ +/target diff --git a/crates/fractured-json/Cargo.lock b/crates/fractured-json/Cargo.lock new file mode 100644 index 0000000000..ff12193551 --- /dev/null +++ b/crates/fractured-json/Cargo.lock @@ -0,0 +1,128 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "fractured_json" +version = "0.1.0" +dependencies = [ + "pretty_assertions", + "serde_json", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zmij" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317f17ff091ac4515f17cc7a190d2769a8c9a96d227de5d64b500b01cda8f2cd" diff --git a/crates/fractured-json/Cargo.toml b/crates/fractured-json/Cargo.toml new file mode 100644 index 0000000000..9b0394c361 --- /dev/null +++ b/crates/fractured-json/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fractured_json" +version = "0.1.0" +edition = "2021" +license = "MIT" +description = "A JSON formatter that produces human-readable output with smart line breaks and table alignment" + +[dependencies] +serde_json = "1.0" + +[dev-dependencies] +pretty_assertions = "1.4" diff --git a/crates/fractured-json/src/formatter.rs b/crates/fractured-json/src/formatter.rs new file mode 100644 index 0000000000..564e3fa883 --- /dev/null +++ b/crates/fractured-json/src/formatter.rs @@ -0,0 +1,1107 @@ +use crate::item::{JsonItem, JsonItemType}; +use crate::options::{FracturedJsonOptions, NumberListAlignment, TableCommaPlacement}; +use crate::padded_tokens::{BracketPaddingType, PaddedFormattingTokens}; +use crate::template::{TableColumnType, TableTemplate}; +use serde_json::Value; + +/// A structure for formatting JSON data in a human-friendly way. +pub struct Formatter { + pub options: FracturedJsonOptions, +} + +impl Default for Formatter { + fn default() -> Self { + Self { + options: FracturedJsonOptions::default(), + } + } +} + +impl Formatter { + /// Creates a new Formatter with default options. + pub fn new() -> Self { + Self::default() + } + + /// Creates a new Formatter with the given options. + pub fn with_options(options: FracturedJsonOptions) -> Self { + Self { options } + } + + /// Reads in JSON text and returns a nicely-formatted string of the same content. + pub fn reformat(&self, json_text: &str, starting_depth: usize) -> Result { + let value: Value = + serde_json::from_str(json_text).map_err(|e| format!("Failed to parse JSON: {}", e))?; + Ok(self.format_value(&value, starting_depth)) + } + + /// Formats a serde_json::Value into a nicely-formatted string. + pub fn format_value(&self, value: &Value, starting_depth: usize) -> String { + let mut item = JsonItem::from_value(value, None); + let pads = PaddedFormattingTokens::new(&self.options); + + self.compute_item_lengths(&mut item, &pads); + + let mut buffer = String::new(); + self.format_item(&mut buffer, &item, starting_depth, false, None, &pads); + + buffer + } + + /// Minifies JSON text by removing all unnecessary whitespace. + pub fn minify(&self, json_text: &str) -> Result { + let value: Value = + serde_json::from_str(json_text).map_err(|e| format!("Failed to parse JSON: {}", e))?; + Ok(self.minify_value(&value)) + } + + /// Minifies a serde_json::Value. + pub fn minify_value(&self, value: &Value) -> String { + value.to_string() + } + + /// Computes lengths for all items recursively. + fn compute_item_lengths(&self, item: &mut JsonItem, pads: &PaddedFormattingTokens) { + for child in &mut item.children { + self.compute_item_lengths(child, pads); + } + + item.value_length = match item.item_type { + JsonItemType::Null => pads.literal_null_len, + JsonItemType::True => pads.literal_true_len, + JsonItemType::False => pads.literal_false_len, + _ => item.value.len(), + }; + + item.name_length = item.name.len(); + item.prefix_comment_length = item.prefix_comment.len(); + item.middle_comment_length = item.middle_comment.len(); + item.postfix_comment_length = item.postfix_comment.len(); + + item.requires_multiple_lines = matches!( + item.item_type, + JsonItemType::BlankLine | JsonItemType::BlockComment | JsonItemType::LineComment + ) || item + .children + .iter() + .any(|ch| ch.requires_multiple_lines || ch.is_post_comment_line_style) + || item.prefix_comment.contains('\n') + || item.middle_comment.contains('\n') + || item.postfix_comment.contains('\n') + || item.value.contains('\n'); + + if matches!(item.item_type, JsonItemType::Array | JsonItemType::Object) { + let pad_type = self.get_padding_type(item); + let children_len: usize = item.children.iter().map(|ch| ch.minimum_total_length).sum(); + let commas_len = if item.children.len() > 1 { + pads.comma_len * (item.children.len() - 1) + } else { + 0 + }; + item.value_length = pads.start_len(item.item_type, pad_type) + + pads.end_len(item.item_type, pad_type) + + children_len + + commas_len; + } + + item.minimum_total_length = if item.prefix_comment_length > 0 { + item.prefix_comment_length + pads.comment_len + } else { + 0 + } + if item.name_length > 0 { + item.name_length + pads.colon_len + } else { + 0 + } + if item.middle_comment_length > 0 { + item.middle_comment_length + pads.comment_len + } else { + 0 + } + item.value_length + + if item.postfix_comment_length > 0 { + item.postfix_comment_length + pads.comment_len + } else { + 0 + }; + } + + /// Formats any item to the buffer. + fn format_item( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + match item.item_type { + JsonItemType::Array | JsonItemType::Object => { + self.format_container( + buffer, + item, + depth, + include_trailing_comma, + parent_template, + pads, + ); + } + JsonItemType::BlankLine => { + self.format_blank_line(buffer, pads); + } + JsonItemType::BlockComment | JsonItemType::LineComment => { + self.format_standalone_comment(buffer, item, depth, pads); + } + _ => { + if item.requires_multiple_lines { + self.format_split_key_value( + buffer, + item, + depth, + include_trailing_comma, + parent_template, + pads, + ); + } else { + self.format_inline_element( + buffer, + item, + depth, + include_trailing_comma, + parent_template, + pads, + ); + } + } + } + } + + /// Formats an array or object container. + fn format_container( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + // Try to inline or compact-multiline format, as long as we're deeper than always_expand_depth + if depth as i32 > self.options.always_expand_depth { + if self.format_container_inline( + buffer, + item, + depth, + include_trailing_comma, + parent_template, + pads, + ) { + return; + } + } + + // Create a helper object to measure how much space we'll need + let recursive_template = item.complexity as i32 + <= self.options.max_compact_array_complexity + || item.complexity as i32 <= self.options.max_table_row_complexity + 1; + let mut template = TableTemplate::new(pads, self.options.number_list_alignment); + template.measure_table_root(item, pads, recursive_template); + + if depth as i32 > self.options.always_expand_depth { + if self.format_container_compact_multiline( + buffer, + item, + depth, + include_trailing_comma, + &template, + parent_template, + pads, + ) { + return; + } + } + + // Allow table formatting at the specified depth too + if depth as i32 >= self.options.always_expand_depth { + if self.format_container_table( + buffer, + item, + depth, + include_trailing_comma, + &mut template.clone(), + parent_template, + pads, + ) { + return; + } + } + + self.format_container_expanded( + buffer, + item, + depth, + include_trailing_comma, + &template, + parent_template, + pads, + ); + } + + /// Tries to format a container inline (single line). + fn format_container_inline( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) -> bool { + if item.requires_multiple_lines { + return false; + } + + let (prefix_length, name_length) = if let Some(pt) = parent_template { + let prefix = if pt.prefix_comment_length > 0 { + pt.prefix_comment_length + pads.comment_len + } else { + 0 + }; + let name = if pt.name_length > 0 { + pt.name_length + pads.colon_len + } else { + 0 + }; + (prefix, name) + } else { + let prefix = if item.prefix_comment_length > 0 { + item.prefix_comment_length + pads.comment_len + } else { + 0 + }; + let name = if item.name_length > 0 { + item.name_length + pads.colon_len + } else { + 0 + }; + (prefix, name) + }; + + let middle_len = if item.middle_comment_length > 0 { + item.middle_comment_length + pads.comment_len + } else { + 0 + }; + let postfix_len = if item.postfix_comment_length > 0 { + item.postfix_comment_length + pads.comment_len + } else { + 0 + }; + let comma_len = if include_trailing_comma { + pads.comma_len + } else { + 0 + }; + + let length_to_consider = + prefix_length + name_length + middle_len + item.value_length + postfix_len + comma_len; + + if item.complexity as i32 > self.options.max_inline_complexity + || length_to_consider > self.available_line_space(depth, pads) + { + return false; + } + + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth)); + self.inline_element_with_eol( + buffer, + item, + include_trailing_comma, + true, + parent_template, + pads, + ); + buffer.push_str(&pads.eol); + + true + } + + /// Tries to format an array as compact multiline (multiple items per line). + fn format_container_compact_multiline( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + template: &TableTemplate, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) -> bool { + if item.item_type != JsonItemType::Array { + return false; + } + if item.children.is_empty() + || item.children.len() < self.options.min_compact_array_row_items + { + return false; + } + if item.complexity as i32 > self.options.max_compact_array_complexity { + return false; + } + if item.requires_multiple_lines { + return false; + } + + let use_table_formatting = !matches!( + template.column_type, + TableColumnType::Unknown | TableColumnType::Mixed + ); + + // If we can't fit lots of them on a line, compact multiline isn't a good choice + let likely_available_line_space = self.available_line_space(depth + 1, pads); + let avg_item_width = pads.comma_len + + if use_table_formatting { + template.total_length + } else { + item.children + .iter() + .map(|ch| ch.minimum_total_length) + .sum::() + / item.children.len() + }; + if avg_item_width * self.options.min_compact_array_row_items > likely_available_line_space { + return false; + } + + // Add prefix_string, indent, prefix comment, starting bracket + let depth_after_colon = + self.standard_format_start(buffer, item, depth, parent_template, pads); + buffer.push_str(pads.start(item.item_type, BracketPaddingType::Empty)); + + let available_line_space = self.available_line_space(depth_after_colon + 1, pads); + let mut remaining_line_space: i32 = -1; + + for (i, child) in item.children.iter().enumerate() { + let needs_comma = i < item.children.len() - 1; + let current_item_width = if use_table_formatting { + template.total_length + } else { + child.minimum_total_length + }; + let space_needed_for_current = + current_item_width + if needs_comma { pads.comma_len } else { 0 }; + + // Check if we need to start a new line + if remaining_line_space < space_needed_for_current as i32 { + buffer.push_str(&pads.eol); + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth_after_colon + 1)); + remaining_line_space = available_line_space as i32; + } + + // Check if the NEXT element will fit on this line (to determine if this is end of line) + let next_item_width = if i + 1 < item.children.len() { + if use_table_formatting { + template.total_length + } else { + item.children[i + 1].minimum_total_length + } + } else { + 0 + }; + let space_after_current = remaining_line_space - space_needed_for_current as i32; + let is_end_of_line = + !needs_comma || space_after_current < (next_item_width + pads.comma_len) as i32; + + if use_table_formatting { + self.inline_table_row_segment( + buffer, + template, + child, + needs_comma, + is_end_of_line, + pads, + ); + } else { + self.inline_element_with_eol( + buffer, + child, + needs_comma, + is_end_of_line, + None, + pads, + ); + } + remaining_line_space -= space_needed_for_current as i32; + } + + // End the line and add closing bracket + buffer.push_str(&pads.eol); + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth_after_colon)); + buffer.push_str(pads.end(item.item_type, BracketPaddingType::Empty)); + + self.standard_format_end(buffer, item, include_trailing_comma, pads); + true + } + + /// Tries to format a container as a table. + fn format_container_table( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + template: &mut TableTemplate, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) -> bool { + // If this element's children are too complex to be written inline, don't bother + if item.complexity as i32 > self.options.max_table_row_complexity + 1 { + return false; + } + + // If any particular row would require multiple lines, we can't table format + if template.requires_multiple_lines { + return false; + } + + let available_space_depth = if item.middle_comment_has_newline { + depth + 2 + } else { + depth + 1 + }; + let available_space = self + .available_line_space(available_space_depth, pads) + .saturating_sub(pads.comma_len); + + // If any child element is too long even without formatting, don't bother + let is_child_too_long = item.children.iter().any(|ch| { + !matches!( + ch.item_type, + JsonItemType::BlankLine | JsonItemType::LineComment | JsonItemType::BlockComment + ) && ch.minimum_total_length > available_space + }); + if is_child_too_long { + return false; + } + + // Try to fit the template + if !template.try_to_fit(pads, available_space) + || template.column_type == TableColumnType::Mixed + { + return false; + } + + let depth_after_colon = + self.standard_format_start(buffer, item, depth, parent_template, pads); + buffer.push_str(pads.start(item.item_type, BracketPaddingType::Empty)); + buffer.push_str(&pads.eol); + + let last_element_index = self.index_of_last_element(&item.children); + for (i, row_item) in item.children.iter().enumerate() { + if row_item.item_type == JsonItemType::BlankLine { + self.format_blank_line(buffer, pads); + continue; + } + if matches!( + row_item.item_type, + JsonItemType::LineComment | JsonItemType::BlockComment + ) { + self.format_standalone_comment(buffer, row_item, depth_after_colon + 1, pads); + continue; + } + + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth_after_colon + 1)); + self.inline_table_row_segment( + buffer, + template, + row_item, + i < last_element_index, + true, + pads, + ); + buffer.push_str(&pads.eol); + } + + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth_after_colon)); + buffer.push_str(pads.end(item.item_type, BracketPaddingType::Empty)); + self.standard_format_end(buffer, item, include_trailing_comma, pads); + + true + } + + /// Formats a container in expanded form (each child on its own line). + fn format_container_expanded( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + template: &TableTemplate, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + let depth_after_colon = + self.standard_format_start(buffer, item, depth, parent_template, pads); + buffer.push_str(pads.start(item.item_type, BracketPaddingType::Empty)); + buffer.push_str(&pads.eol); + + // Decide whether to align this container's property values + let align_props = item.item_type == JsonItemType::Object + && template.name_length.saturating_sub(template.name_minimum) + <= self.options.max_prop_name_padding + && !template.any_middle_comment_has_newline + && self.available_line_space(depth + 1, pads) >= template.atomic_item_size(); + let template_to_pass = if align_props { Some(template) } else { None }; + + let last_element_index = self.index_of_last_element(&item.children); + for (i, child) in item.children.iter().enumerate() { + self.format_item( + buffer, + child, + depth_after_colon + 1, + i < last_element_index, + template_to_pass, + pads, + ); + } + + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth_after_colon)); + buffer.push_str(pads.end(item.item_type, BracketPaddingType::Empty)); + self.standard_format_end(buffer, item, include_trailing_comma, pads); + } + + /// Formats a standalone comment. + fn format_standalone_comment( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + pads: &PaddedFormattingTokens, + ) { + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth)); + buffer.push_str(&item.value); + buffer.push_str(&pads.eol); + } + + /// Formats a blank line. + fn format_blank_line(&self, buffer: &mut String, pads: &PaddedFormattingTokens) { + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.eol); + } + + /// Formats an inline element. + fn format_inline_element( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth)); + self.inline_element(buffer, item, include_trailing_comma, parent_template, pads); + buffer.push_str(&pads.eol); + } + + /// Formats a split key/value (when middle comment spans multiple lines). + fn format_split_key_value( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + include_trailing_comma: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + self.standard_format_start(buffer, item, depth, parent_template, pads); + buffer.push_str(&item.value); + self.standard_format_end(buffer, item, include_trailing_comma, pads); + } + + /// Standard formatting for the start of an item. + fn standard_format_start( + &self, + buffer: &mut String, + item: &JsonItem, + depth: usize, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) -> usize { + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth)); + + if let Some(pt) = parent_template { + self.add_to_buffer_fixed( + buffer, + &item.prefix_comment, + item.prefix_comment_length, + pt.prefix_comment_length, + &pads.comment, + false, + ); + self.add_to_buffer_fixed( + buffer, + &item.name, + item.name_length, + pt.name_length, + &pads.colon, + self.options.colon_before_prop_name_padding, + ); + } else { + self.add_to_buffer( + buffer, + &item.prefix_comment, + item.prefix_comment_length, + &pads.comment, + ); + self.add_to_buffer(buffer, &item.name, item.name_length, &pads.colon); + } + + if item.middle_comment_length == 0 { + return depth; + } + + if !item.middle_comment_has_newline { + let middle_pad = if let Some(pt) = parent_template { + pt.middle_comment_length + .saturating_sub(item.middle_comment_length) + } else { + 0 + }; + buffer.push_str(&item.middle_comment); + self.add_spaces(buffer, middle_pad); + buffer.push_str(&pads.comment); + return depth; + } + + // If the middle comment requires multiple lines, start a new line + buffer.push_str(&pads.eol); + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth + 1)); + buffer.push_str(&item.middle_comment); + buffer.push_str(&pads.eol); + buffer.push_str(&self.options.prefix_string); + buffer.push_str(&pads.indent(depth + 1)); + + depth + 1 + } + + /// Standard formatting for the end of an item. + fn standard_format_end( + &self, + buffer: &mut String, + item: &JsonItem, + include_trailing_comma: bool, + pads: &PaddedFormattingTokens, + ) { + if include_trailing_comma && item.is_post_comment_line_style { + buffer.push_str(&pads.comma_no_pad); + } + if item.postfix_comment_length > 0 { + buffer.push_str(&pads.comment); + buffer.push_str(&item.postfix_comment); + } + if include_trailing_comma && !item.is_post_comment_line_style { + buffer.push_str(&pads.comma_no_pad); + } + buffer.push_str(&pads.eol); + } + + /// Writes an element inline (without indentation/newlines). + /// If `is_end_of_line` is true, uses comma without trailing space to avoid trailing whitespace. + fn inline_element( + &self, + buffer: &mut String, + item: &JsonItem, + include_trailing_comma: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + self.inline_element_with_eol( + buffer, + item, + include_trailing_comma, + false, + parent_template, + pads, + ); + } + + /// Writes an element inline with control over end-of-line comma handling. + fn inline_element_with_eol( + &self, + buffer: &mut String, + item: &JsonItem, + include_trailing_comma: bool, + is_end_of_line: bool, + parent_template: Option<&TableTemplate>, + pads: &PaddedFormattingTokens, + ) { + if let Some(pt) = parent_template { + self.add_to_buffer_fixed( + buffer, + &item.prefix_comment, + item.prefix_comment_length, + pt.prefix_comment_length, + &pads.comment, + false, + ); + self.add_to_buffer_fixed( + buffer, + &item.name, + item.name_length, + pt.name_length, + &pads.colon, + self.options.colon_before_prop_name_padding, + ); + self.add_to_buffer_fixed( + buffer, + &item.middle_comment, + item.middle_comment_length, + pt.middle_comment_length, + &pads.comment, + false, + ); + } else { + self.add_to_buffer( + buffer, + &item.prefix_comment, + item.prefix_comment_length, + &pads.comment, + ); + self.add_to_buffer(buffer, &item.name, item.name_length, &pads.colon); + self.add_to_buffer( + buffer, + &item.middle_comment, + item.middle_comment_length, + &pads.comment, + ); + } + + self.inline_element_raw(buffer, item, pads); + + // Use comma without trailing space at end of line to avoid trailing whitespace + let comma = if is_end_of_line { + &pads.comma_no_pad + } else { + &pads.comma + }; + + if include_trailing_comma && item.is_post_comment_line_style { + buffer.push_str(comma); + } + if item.postfix_comment_length > 0 { + buffer.push_str(&pads.comment); + buffer.push_str(&item.postfix_comment); + } + if include_trailing_comma && !item.is_post_comment_line_style { + buffer.push_str(comma); + } + } + + /// Writes just the value of an element inline. + fn inline_element_raw( + &self, + buffer: &mut String, + item: &JsonItem, + pads: &PaddedFormattingTokens, + ) { + match item.item_type { + JsonItemType::Array => { + let pad_type = self.get_padding_type(item); + buffer.push_str(pads.arr_start(pad_type)); + for (i, child) in item.children.iter().enumerate() { + self.inline_element(buffer, child, i < item.children.len() - 1, None, pads); + } + buffer.push_str(pads.arr_end(pad_type)); + } + JsonItemType::Object => { + let pad_type = self.get_padding_type(item); + buffer.push_str(pads.obj_start(pad_type)); + for (i, child) in item.children.iter().enumerate() { + self.inline_element(buffer, child, i < item.children.len() - 1, None, pads); + } + buffer.push_str(pads.obj_end(pad_type)); + } + _ => { + buffer.push_str(&item.value); + } + } + } + + /// Writes a table row segment. + fn inline_table_row_segment( + &self, + buffer: &mut String, + template: &TableTemplate, + item: &JsonItem, + include_trailing_comma: bool, + is_whole_row: bool, + pads: &PaddedFormattingTokens, + ) { + self.add_to_buffer_fixed( + buffer, + &item.prefix_comment, + item.prefix_comment_length, + template.prefix_comment_length, + &pads.comment, + false, + ); + self.add_to_buffer_fixed( + buffer, + &item.name, + item.name_length, + template.name_length, + &pads.colon, + self.options.colon_before_prop_name_padding, + ); + self.add_to_buffer_fixed( + buffer, + &item.middle_comment, + item.middle_comment_length, + template.middle_comment_length, + &pads.comment, + false, + ); + + // Determine comma placement + let comma_before_pad = self.options.table_comma_placement + == TableCommaPlacement::BeforePadding + || (self.options.table_comma_placement + == TableCommaPlacement::BeforePaddingExceptNumbers + && template.column_type != TableColumnType::Number); + + // Use comma without trailing space at end of rows to avoid trailing whitespace + let comma_type = if include_trailing_comma { + if is_whole_row { + &pads.comma_no_pad + } else { + &pads.comma + } + } else if is_whole_row { + "" // No dummy comma at end of row + } else { + &pads.dummy_comma + }; + + if !template.children.is_empty() && item.item_type != JsonItemType::Null { + if template.column_type == TableColumnType::Array { + self.inline_table_raw_array(buffer, template, item, pads); + } else { + self.inline_table_raw_object(buffer, template, item, pads); + } + if comma_before_pad { + buffer.push_str(comma_type); + } + if template.shorter_than_null_adjustment > 0 { + self.add_spaces(buffer, template.shorter_than_null_adjustment); + } + } else if template.column_type == TableColumnType::Number { + let (formatted, left_pad, right_pad) = + template.format_number(&item.value, item.item_type); + self.add_spaces(buffer, left_pad); + buffer.push_str(&formatted); + if comma_before_pad { + buffer.push_str(comma_type); + } + self.add_spaces(buffer, right_pad); + } else { + self.inline_element_raw(buffer, item, pads); + if comma_before_pad { + buffer.push_str(comma_type); + } + self.add_spaces( + buffer, + template + .composite_value_length + .saturating_sub(item.value_length), + ); + } + + if !comma_before_pad { + buffer.push_str(comma_type); + } + + if template.postfix_comment_length > 0 { + buffer.push_str(&pads.comment); + buffer.push_str(&item.postfix_comment); + // Only add padding for postfix comments if we're not at the end of a row + if !is_whole_row { + self.add_spaces( + buffer, + template + .postfix_comment_length + .saturating_sub(item.postfix_comment_length), + ); + } + } + } + + /// Writes an array value for table formatting. + fn inline_table_raw_array( + &self, + buffer: &mut String, + template: &TableTemplate, + item: &JsonItem, + pads: &PaddedFormattingTokens, + ) { + buffer.push_str(pads.arr_start(template.pad_type)); + for (i, sub_template) in template.children.iter().enumerate() { + let is_last_in_template = i == template.children.len() - 1; + let is_last_in_array = i == item.children.len().saturating_sub(1); + let is_past_end = i >= item.children.len(); + + if is_past_end { + self.add_spaces(buffer, sub_template.total_length); + if !is_last_in_template { + buffer.push_str(&pads.dummy_comma); + } + } else { + self.inline_table_row_segment( + buffer, + sub_template, + &item.children[i], + !is_last_in_array, + false, + pads, + ); + if is_last_in_array && !is_last_in_template { + buffer.push_str(&pads.dummy_comma); + } + } + } + buffer.push_str(pads.arr_end(template.pad_type)); + } + + /// Writes an object value for table formatting. + fn inline_table_raw_object( + &self, + buffer: &mut String, + template: &TableTemplate, + item: &JsonItem, + pads: &PaddedFormattingTokens, + ) { + let matches: Vec<_> = template + .children + .iter() + .map(|sub| { + let matching_child = item + .children + .iter() + .find(|ch| sub.location_in_parent.as_ref() == Some(&ch.name)); + (sub, matching_child) + }) + .collect(); + + let last_non_null_idx = matches + .iter() + .enumerate() + .rev() + .find(|(_, (_, child))| child.is_some()) + .map(|(i, _)| i) + .unwrap_or(0); + + buffer.push_str(pads.obj_start(template.pad_type)); + for (i, (sub_template, sub_item)) in matches.iter().enumerate() { + let is_last_in_object = i == last_non_null_idx; + let is_last_in_template = i == matches.len() - 1; + + if let Some(item) = sub_item { + self.inline_table_row_segment( + buffer, + sub_template, + item, + !is_last_in_object, + false, + pads, + ); + if is_last_in_object && !is_last_in_template { + buffer.push_str(&pads.dummy_comma); + } + } else { + self.add_spaces(buffer, sub_template.total_length); + if !is_last_in_template { + buffer.push_str(&pads.dummy_comma); + } + } + } + buffer.push_str(pads.obj_end(template.pad_type)); + } + + fn get_padding_type(&self, item: &JsonItem) -> BracketPaddingType { + if item.children.is_empty() { + BracketPaddingType::Empty + } else if item.complexity >= 2 { + BracketPaddingType::Complex + } else { + BracketPaddingType::Simple + } + } + + fn available_line_space(&self, depth: usize, pads: &PaddedFormattingTokens) -> usize { + self.options + .max_total_line_length + .saturating_sub(pads.prefix_string_len) + .saturating_sub(self.options.indent_spaces * depth) + } + + fn index_of_last_element(&self, children: &[JsonItem]) -> usize { + children + .iter() + .enumerate() + .rev() + .find(|(_, ch)| { + !matches!( + ch.item_type, + JsonItemType::BlankLine + | JsonItemType::LineComment + | JsonItemType::BlockComment + ) + }) + .map(|(i, _)| i) + .unwrap_or(0) + } + + fn add_to_buffer(&self, buffer: &mut String, text: &str, length: usize, suffix: &str) { + if length > 0 { + buffer.push_str(text); + buffer.push_str(suffix); + } + } + + fn add_to_buffer_fixed( + &self, + buffer: &mut String, + text: &str, + actual_length: usize, + target_length: usize, + suffix: &str, + suffix_before_padding: bool, + ) { + if target_length == 0 { + return; + } + buffer.push_str(text); + if suffix_before_padding { + buffer.push_str(suffix); + self.add_spaces(buffer, target_length.saturating_sub(actual_length)); + } else { + self.add_spaces(buffer, target_length.saturating_sub(actual_length)); + buffer.push_str(suffix); + } + } + + fn add_spaces(&self, buffer: &mut String, count: usize) { + for _ in 0..count { + buffer.push(' '); + } + } +} diff --git a/crates/fractured-json/src/item.rs b/crates/fractured-json/src/item.rs new file mode 100644 index 0000000000..9ba4dc4cf1 --- /dev/null +++ b/crates/fractured-json/src/item.rs @@ -0,0 +1,176 @@ +use serde_json::Value; + +/// The type of a JSON item. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum JsonItemType { + Null, + True, + False, + Number, + String, + Array, + Object, + BlankLine, + BlockComment, + LineComment, +} + +/// A distinct thing that can be where ever JSON values are expected in a JSON document. +/// This could be an actual data value, such as a string, number, array, etc., or it could be +/// a blank line or standalone comment. +#[derive(Debug, Clone)] +pub struct JsonItem { + /// The type of item - string, blank line, etc. + pub item_type: JsonItemType, + + /// Nesting level of this item's contents if any. A simple item, or an empty array or object, + /// has a complexity of zero. Non-empty arrays/objects have a complexity 1 greater than that + /// of their child with the greatest complexity. + pub complexity: usize, + + /// Property name, if this is an element that is contained in an object. + pub name: String, + + /// The text value of this item, non-recursively. Empty for objects and arrays. + pub value: String, + + /// Comment that belongs in front of this element on the same line, if any. + pub prefix_comment: String, + + /// Comment that belongs in between the property name and value, if any. + pub middle_comment: String, + + /// True if there's a line-style middle comment or a block style one with a newline in it. + pub middle_comment_has_newline: bool, + + /// Comment that belongs after this element on the same line, if any. + pub postfix_comment: String, + + /// True if the postfix comment is to-end-of-line rather than block style. + pub is_post_comment_line_style: bool, + + /// String length of the name part. + pub name_length: usize, + + /// String length of the value part. If it's an array or object, it's the sum of the children, + /// with padding and brackets. + pub value_length: usize, + + /// Length of the comment at the front of the item, if any. + pub prefix_comment_length: usize, + + /// Length of the comment in the middle of the item, if any. + pub middle_comment_length: usize, + + /// Length of the comment at the end of the item, if any. + pub postfix_comment_length: usize, + + /// The smallest possible size this item - including all comments and children if appropriate - + /// can be written. + pub minimum_total_length: usize, + + /// True if this item can't be written on a single line. + pub requires_multiple_lines: bool, + + /// List of this item's contents, if it's an array or object. + pub children: Vec, +} + +impl Default for JsonItem { + fn default() -> Self { + Self { + item_type: JsonItemType::Null, + complexity: 0, + name: String::new(), + value: String::new(), + prefix_comment: String::new(), + middle_comment: String::new(), + middle_comment_has_newline: false, + postfix_comment: String::new(), + is_post_comment_line_style: false, + name_length: 0, + value_length: 0, + prefix_comment_length: 0, + middle_comment_length: 0, + postfix_comment_length: 0, + minimum_total_length: 0, + requires_multiple_lines: false, + children: Vec::new(), + } + } +} + +impl JsonItem { + /// Convert a serde_json::Value to a JsonItem tree. + pub fn from_value(value: &Value, prop_name: Option<&str>) -> Self { + let (item_type, complexity, children, value_str) = match value { + Value::Null => (JsonItemType::Null, 0, Vec::new(), "null".to_string()), + Value::Bool(true) => (JsonItemType::True, 0, Vec::new(), "true".to_string()), + Value::Bool(false) => (JsonItemType::False, 0, Vec::new(), "false".to_string()), + Value::Number(n) => (JsonItemType::Number, 0, Vec::new(), n.to_string()), + Value::String(s) => { + let escaped = escape_json_string(s); + (JsonItemType::String, 0, Vec::new(), escaped) + } + Value::Array(arr) => { + let children: Vec = + arr.iter().map(|v| JsonItem::from_value(v, None)).collect(); + let complexity = if children.is_empty() { + 0 + } else { + children.iter().map(|c| c.complexity).max().unwrap_or(0) + 1 + }; + (JsonItemType::Array, complexity, children, String::new()) + } + Value::Object(obj) => { + let children: Vec = obj + .iter() + .map(|(k, v)| JsonItem::from_value(v, Some(k))) + .collect(); + let complexity = if children.is_empty() { + 0 + } else { + children.iter().map(|c| c.complexity).max().unwrap_or(0) + 1 + }; + (JsonItemType::Object, complexity, children, String::new()) + } + }; + + let name = prop_name + .map(|n| format!("\"{}\"", escape_string_content(n))) + .unwrap_or_default(); + + JsonItem { + item_type, + complexity, + name, + value: value_str, + children, + ..Default::default() + } + } +} + +/// Escape a string for JSON output (with quotes). +fn escape_json_string(s: &str) -> String { + format!("\"{}\"", escape_string_content(s)) +} + +/// Escape string content (without quotes). +fn escape_string_content(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '"' => result.push_str("\\\""), + '\\' => result.push_str("\\\\"), + '\n' => result.push_str("\\n"), + '\r' => result.push_str("\\r"), + '\t' => result.push_str("\\t"), + c if c.is_control() => { + result.push_str(&format!("\\u{:04x}", c as u32)); + } + c => result.push(c), + } + } + result +} diff --git a/crates/fractured-json/src/lib.rs b/crates/fractured-json/src/lib.rs new file mode 100644 index 0000000000..55d8ba9063 --- /dev/null +++ b/crates/fractured-json/src/lib.rs @@ -0,0 +1,11 @@ +mod formatter; +mod item; +mod options; +mod padded_tokens; +mod template; + +pub use formatter::Formatter; +pub use options::*; + +#[cfg(test)] +mod tests; diff --git a/crates/fractured-json/src/options.rs b/crates/fractured-json/src/options.rs new file mode 100644 index 0000000000..57e49f6a77 --- /dev/null +++ b/crates/fractured-json/src/options.rs @@ -0,0 +1,177 @@ +/// Specifies the line break style for the formatted JSON output. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum EolStyle { + /// Use the system default line ending (CRLF on Windows, LF elsewhere) + #[default] + Default, + /// Use CRLF (\r\n) line endings + Crlf, + /// Use LF (\n) line endings + Lf, +} + +/// Determines how the formatter should treat comments. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CommentPolicy { + /// Treat comments as errors (default, since JSON standard doesn't allow comments) + #[default] + TreatAsError, + /// Preserve comments in the output + Preserve, + /// Remove comments from the output + Remove, +} + +/// Controls alignment of numbers in table columns or compact multiline arrays. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum NumberListAlignment { + /// Left-align numbers + Left, + /// Right-align numbers + Right, + /// Align numbers by decimal point + #[default] + Decimal, + /// Normalize numbers to have the same decimal precision + Normalize, +} + +/// Determines where commas are placed in table-formatted rows. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum TableCommaPlacement { + /// Place commas directly after each element, before padding spaces + BeforePadding, + /// Place commas after padding spaces, lined up in their own column + AfterPadding, + /// Place commas before padding for most types, but after padding for numbers + #[default] + BeforePaddingExceptNumbers, +} + +/// Settings controlling the output of FracturedJson-formatted JSON documents. +#[derive(Debug, Clone)] +pub struct FracturedJsonOptions { + /// Specifies the line break style for the formatted JSON output. + pub json_eol_style: EolStyle, + + /// Maximum length (in characters, including indentation) when more than one simple value is put on a line. + pub max_total_line_length: usize, + + /// Maximum nesting level of arrays/objects that may be written on a single line. + /// 0 disables inlining. 1 allows inlining of arrays/objects that contain only simple items. + pub max_inline_complexity: i32, + + /// Maximum nesting level for arrays formatted with multiple items per row across multiple lines. + /// Set to -1 to disable this format. + pub max_compact_array_complexity: i32, + + /// Maximum nesting level of the rows of an array or object formatted as a table. + /// Set to -1 to disable this feature. + pub max_table_row_complexity: i32, + + /// Maximum length difference between property names in an object to align them vertically. + pub max_prop_name_padding: usize, + + /// If true, colons in aligned object properties are placed right after the property name. + pub colon_before_prop_name_padding: bool, + + /// Determines whether commas in table-formatted rows are lined up in their own column. + pub table_comma_placement: TableCommaPlacement, + + /// Minimum items per row to format an array with multiple items per line. + pub min_compact_array_row_items: usize, + + /// Depth at which lists/objects are always fully expanded, regardless of other settings. + /// -1 = none; 0 = root node only; 1 = root node and its children. + pub always_expand_depth: i32, + + /// If true, spaces are included inside brackets for nested arrays/objects. + pub nested_bracket_padding: bool, + + /// If true, spaces are included inside brackets for simple arrays/objects. + pub simple_bracket_padding: bool, + + /// If true, includes a space after property colons. + pub colon_padding: bool, + + /// If true, includes a space after commas. + pub comma_padding: bool, + + /// If true, spaces are included between JSON data and comments. + pub comment_padding: bool, + + /// Controls alignment of numbers in table columns or compact multiline arrays. + pub number_list_alignment: NumberListAlignment, + + /// Number of spaces to use per indent level. + pub indent_spaces: usize, + + /// Uses a single tab per indent level, instead of spaces. + pub use_tab_to_indent: bool, + + /// String attached to the beginning of every line, before regular indentation. + pub prefix_string: String, + + /// Determines how the parser and formatter should treat comments. + pub comment_policy: CommentPolicy, + + /// If true, blank lines in the original input should be preserved in the output. + pub preserve_blank_lines: bool, + + /// If true, allows a comma after the last element in arrays or objects. + pub allow_trailing_commas: bool, +} + +impl Default for FracturedJsonOptions { + fn default() -> Self { + Self { + json_eol_style: EolStyle::Default, + max_total_line_length: 120, + max_inline_complexity: 2, + max_compact_array_complexity: 2, + max_table_row_complexity: 2, + max_prop_name_padding: 16, + colon_before_prop_name_padding: false, + table_comma_placement: TableCommaPlacement::BeforePaddingExceptNumbers, + min_compact_array_row_items: 3, + always_expand_depth: -1, + nested_bracket_padding: true, + simple_bracket_padding: false, + colon_padding: true, + comma_padding: true, + comment_padding: true, + number_list_alignment: NumberListAlignment::Decimal, + indent_spaces: 4, + use_tab_to_indent: false, + prefix_string: String::new(), + comment_policy: CommentPolicy::TreatAsError, + preserve_blank_lines: false, + allow_trailing_commas: false, + } + } +} + +impl FracturedJsonOptions { + /// Creates a new FracturedJsonOptions with recommended settings. + pub fn recommended() -> Self { + Self::default() + } + + /// Returns the end-of-line string based on the configured style. + pub fn eol_str(&self) -> &'static str { + match self.json_eol_style { + EolStyle::Crlf => "\r\n", + EolStyle::Lf => "\n", + EolStyle::Default => { + #[cfg(windows)] + { + "\r\n" + } + #[cfg(not(windows))] + { + "\n" + } + } + } + } +} diff --git a/crates/fractured-json/src/padded_tokens.rs b/crates/fractured-json/src/padded_tokens.rs new file mode 100644 index 0000000000..6b52f083bd --- /dev/null +++ b/crates/fractured-json/src/padded_tokens.rs @@ -0,0 +1,184 @@ +use crate::item::JsonItemType; +use crate::options::FracturedJsonOptions; + +/// Type of bracket padding to use. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BracketPaddingType { + Empty, + Simple, + Complex, +} + +/// Pre-computed formatting tokens with padding based on options. +#[derive(Debug, Clone)] +pub struct PaddedFormattingTokens { + pub eol: String, + pub indent_unit: String, + pub prefix_string_len: usize, + + pub colon: String, + pub colon_len: usize, + pub comma: String, + pub comma_len: usize, + pub comma_no_pad: String, + pub dummy_comma: String, + pub comment: String, + pub comment_len: usize, + + pub arr_start_empty: String, + pub arr_start_simple: String, + pub arr_start_complex: String, + pub arr_end_empty: String, + pub arr_end_simple: String, + pub arr_end_complex: String, + + pub obj_start_empty: String, + pub obj_start_simple: String, + pub obj_start_complex: String, + pub obj_end_empty: String, + pub obj_end_simple: String, + pub obj_end_complex: String, + + pub literal_null_len: usize, + pub literal_true_len: usize, + pub literal_false_len: usize, +} + +impl PaddedFormattingTokens { + pub fn new(options: &FracturedJsonOptions) -> Self { + let eol = options.eol_str().to_string(); + + let indent_unit = if options.use_tab_to_indent { + "\t".to_string() + } else { + " ".repeat(options.indent_spaces) + }; + + let colon = if options.colon_padding { ": " } else { ":" }.to_string(); + let comma = if options.comma_padding { ", " } else { "," }.to_string(); + let comma_no_pad = ",".to_string(); + let dummy_comma = " ".repeat(comma.len()); + let comment = if options.comment_padding { " " } else { "" }.to_string(); + + let (arr_start_simple, arr_end_simple) = if options.simple_bracket_padding { + ("[ ".to_string(), " ]".to_string()) + } else { + ("[".to_string(), "]".to_string()) + }; + + let (arr_start_complex, arr_end_complex) = if options.nested_bracket_padding { + ("[ ".to_string(), " ]".to_string()) + } else { + ("[".to_string(), "]".to_string()) + }; + + let (obj_start_simple, obj_end_simple) = if options.simple_bracket_padding { + ("{ ".to_string(), " }".to_string()) + } else { + ("{".to_string(), "}".to_string()) + }; + + let (obj_start_complex, obj_end_complex) = if options.nested_bracket_padding { + ("{ ".to_string(), " }".to_string()) + } else { + ("{".to_string(), "}".to_string()) + }; + + Self { + eol, + indent_unit, + prefix_string_len: options.prefix_string.len(), + colon_len: colon.len(), + colon, + comma_len: comma.len(), + comma, + comma_no_pad, + dummy_comma, + comment_len: comment.len(), + comment, + arr_start_empty: "[".to_string(), + arr_start_simple, + arr_start_complex, + arr_end_empty: "]".to_string(), + arr_end_simple, + arr_end_complex, + obj_start_empty: "{".to_string(), + obj_start_simple, + obj_start_complex, + obj_end_empty: "}".to_string(), + obj_end_simple, + obj_end_complex, + literal_null_len: 4, + literal_true_len: 4, + literal_false_len: 5, + } + } + + pub fn indent(&self, depth: usize) -> String { + self.indent_unit.repeat(depth) + } + + pub fn arr_start(&self, pad_type: BracketPaddingType) -> &str { + match pad_type { + BracketPaddingType::Empty => &self.arr_start_empty, + BracketPaddingType::Simple => &self.arr_start_simple, + BracketPaddingType::Complex => &self.arr_start_complex, + } + } + + pub fn arr_end(&self, pad_type: BracketPaddingType) -> &str { + match pad_type { + BracketPaddingType::Empty => &self.arr_end_empty, + BracketPaddingType::Simple => &self.arr_end_simple, + BracketPaddingType::Complex => &self.arr_end_complex, + } + } + + pub fn obj_start(&self, pad_type: BracketPaddingType) -> &str { + match pad_type { + BracketPaddingType::Empty => &self.obj_start_empty, + BracketPaddingType::Simple => &self.obj_start_simple, + BracketPaddingType::Complex => &self.obj_start_complex, + } + } + + pub fn obj_end(&self, pad_type: BracketPaddingType) -> &str { + match pad_type { + BracketPaddingType::Empty => &self.obj_end_empty, + BracketPaddingType::Simple => &self.obj_end_simple, + BracketPaddingType::Complex => &self.obj_end_complex, + } + } + + pub fn start(&self, item_type: JsonItemType, pad_type: BracketPaddingType) -> &str { + match item_type { + JsonItemType::Array => self.arr_start(pad_type), + JsonItemType::Object => self.obj_start(pad_type), + _ => "", + } + } + + pub fn end(&self, item_type: JsonItemType, pad_type: BracketPaddingType) -> &str { + match item_type { + JsonItemType::Array => self.arr_end(pad_type), + JsonItemType::Object => self.obj_end(pad_type), + _ => "", + } + } + + pub fn start_len(&self, item_type: JsonItemType, pad_type: BracketPaddingType) -> usize { + self.start(item_type, pad_type).len() + } + + pub fn end_len(&self, item_type: JsonItemType, pad_type: BracketPaddingType) -> usize { + self.end(item_type, pad_type).len() + } + + pub fn arr_start_len(&self, pad_type: BracketPaddingType) -> usize { + self.arr_start(pad_type).len() + } + + pub fn arr_end_len(&self, pad_type: BracketPaddingType) -> usize { + self.arr_end(pad_type).len() + } +} diff --git a/crates/fractured-json/src/template.rs b/crates/fractured-json/src/template.rs new file mode 100644 index 0000000000..ecaa006cfd --- /dev/null +++ b/crates/fractured-json/src/template.rs @@ -0,0 +1,462 @@ +use crate::item::{JsonItem, JsonItemType}; +use crate::options::NumberListAlignment; +use crate::padded_tokens::{BracketPaddingType, PaddedFormattingTokens}; + +/// Type of the column, for table formatting purposes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TableColumnType { + Unknown, + Number, + Array, + Object, + Simple, + Mixed, +} + +/// Collects spacing information about the columns of a potential table. +#[derive(Debug, Clone)] +pub struct TableTemplate { + /// The property name in the table that this segment matches up with. + pub location_in_parent: Option, + + /// Type of the column, for table formatting purposes. + pub column_type: TableColumnType, + + /// Number of rows measured. + pub row_count: usize, + + /// Length of the longest property name. + pub name_length: usize, + + /// Length of the shortest property name. + pub name_minimum: usize, + + /// Largest length for the value parts of the column. + pub max_value_length: usize, + + /// Length of the largest value that can't be split apart. + pub max_atomic_value_length: usize, + + pub prefix_comment_length: usize, + pub middle_comment_length: usize, + pub any_middle_comment_has_newline: bool, + pub postfix_comment_length: usize, + pub is_any_post_comment_line_style: bool, + pub pad_type: BracketPaddingType, + pub requires_multiple_lines: bool, + + /// Length of the value for this template when things are complicated. + pub composite_value_length: usize, + + /// Length of the entire template, including space for the value, property name, and all comments. + pub total_length: usize, + + /// If the row contains non-empty array or objects whose value is shorter than the literal null. + pub shorter_than_null_adjustment: usize, + + /// True if at least one row in the column this represents has a null value. + pub contains_null: bool, + + /// Sub-templates for array/object children. + pub children: Vec, + + // Number alignment fields + number_list_alignment: NumberListAlignment, + max_dig_before_dec: usize, + max_dig_after_dec: usize, + + // Reference to pads for calculations + pads_colon_len: usize, + pads_comma_len: usize, + pads_comment_len: usize, + pads_literal_null_len: usize, +} + +impl TableTemplate { + pub fn new(pads: &PaddedFormattingTokens, number_list_alignment: NumberListAlignment) -> Self { + Self { + location_in_parent: None, + column_type: TableColumnType::Unknown, + row_count: 0, + name_length: 0, + name_minimum: usize::MAX, + max_value_length: 0, + max_atomic_value_length: 0, + prefix_comment_length: 0, + middle_comment_length: 0, + any_middle_comment_has_newline: false, + postfix_comment_length: 0, + is_any_post_comment_line_style: false, + pad_type: BracketPaddingType::Simple, + requires_multiple_lines: false, + composite_value_length: 0, + total_length: 0, + shorter_than_null_adjustment: 0, + contains_null: false, + children: Vec::new(), + number_list_alignment, + max_dig_before_dec: 0, + max_dig_after_dec: 0, + pads_colon_len: pads.colon_len, + pads_comma_len: pads.comma_len, + pads_comment_len: pads.comment_len, + pads_literal_null_len: pads.literal_null_len, + } + } + + /// Analyzes an object/array for formatting as a table. + pub fn measure_table_root( + &mut self, + table_root: &JsonItem, + pads: &PaddedFormattingTokens, + recursive: bool, + ) { + for child in &table_root.children { + self.measure_row_segment(child, pads, recursive); + } + self.prune_and_recompute(pads, i32::MAX); + } + + /// Check if the template's width fits in the given size. + pub fn try_to_fit(&mut self, pads: &PaddedFormattingTokens, maximum_length: usize) -> bool { + let mut complexity = self.get_template_complexity(); + loop { + if self.total_length <= maximum_length { + return true; + } + if complexity <= 0 { + return false; + } + complexity -= 1; + self.prune_and_recompute(pads, complexity); + } + } + + /// Length of the largest item that can't be split across multiple lines. + pub fn atomic_item_size(&self) -> usize { + self.name_length + + self.pads_colon_len + + self.middle_comment_length + + if self.middle_comment_length > 0 { + self.pads_comment_len + } else { + 0 + } + + self.max_atomic_value_length + + self.postfix_comment_length + + if self.postfix_comment_length > 0 { + self.pads_comment_len + } else { + 0 + } + + self.pads_comma_len + } + + fn measure_row_segment( + &mut self, + row_segment: &JsonItem, + pads: &PaddedFormattingTokens, + recursive: bool, + ) { + // Standalone comments and blank lines don't figure into template measurements + if matches!( + row_segment.item_type, + JsonItemType::BlankLine | JsonItemType::BlockComment | JsonItemType::LineComment + ) { + return; + } + + let row_table_type = match row_segment.item_type { + JsonItemType::Null => TableColumnType::Unknown, + JsonItemType::Number => TableColumnType::Number, + JsonItemType::Array => TableColumnType::Array, + JsonItemType::Object => TableColumnType::Object, + _ => TableColumnType::Simple, + }; + + if self.column_type == TableColumnType::Unknown { + self.column_type = row_table_type; + } else if row_table_type != TableColumnType::Unknown && self.column_type != row_table_type { + self.column_type = TableColumnType::Mixed; + } + + if row_segment.item_type == JsonItemType::Null { + self.max_dig_before_dec = self.max_dig_before_dec.max(pads.literal_null_len); + self.contains_null = true; + } + + if row_segment.requires_multiple_lines { + self.requires_multiple_lines = true; + self.column_type = TableColumnType::Mixed; + } + + // Update the numbers + self.row_count += 1; + self.name_length = self.name_length.max(row_segment.name_length); + self.name_minimum = self.name_minimum.min(row_segment.name_length); + self.max_value_length = self.max_value_length.max(row_segment.value_length); + self.middle_comment_length = self + .middle_comment_length + .max(row_segment.middle_comment_length); + self.prefix_comment_length = self + .prefix_comment_length + .max(row_segment.prefix_comment_length); + self.postfix_comment_length = self + .postfix_comment_length + .max(row_segment.postfix_comment_length); + self.is_any_post_comment_line_style |= row_segment.is_post_comment_line_style; + self.any_middle_comment_has_newline |= row_segment.middle_comment_has_newline; + + if !matches!( + row_segment.item_type, + JsonItemType::Array | JsonItemType::Object + ) { + self.max_atomic_value_length = + self.max_atomic_value_length.max(row_segment.value_length); + } + + if row_segment.complexity >= 2 { + self.pad_type = BracketPaddingType::Complex; + } + + if self.requires_multiple_lines || row_segment.item_type == JsonItemType::Null { + return; + } + + if self.column_type == TableColumnType::Array && recursive { + for (i, child) in row_segment.children.iter().enumerate() { + if self.children.len() <= i { + self.children + .push(TableTemplate::new(pads, self.number_list_alignment)); + } + self.children[i].measure_row_segment(child, pads, true); + } + } else if self.column_type == TableColumnType::Object && recursive { + // Check for duplicate keys + let distinct_keys: std::collections::HashSet<_> = + row_segment.children.iter().map(|c| &c.name).collect(); + if distinct_keys.len() != row_segment.children.len() { + self.column_type = TableColumnType::Simple; + return; + } + + for child in &row_segment.children { + let sub_template = self + .children + .iter_mut() + .find(|t| t.location_in_parent.as_ref() == Some(&child.name)); + + if let Some(template) = sub_template { + template.measure_row_segment(child, pads, true); + } else { + let mut new_template = TableTemplate::new(pads, self.number_list_alignment); + new_template.location_in_parent = Some(child.name.clone()); + new_template.measure_row_segment(child, pads, true); + self.children.push(new_template); + } + } + } + + // Number alignment handling + if self.column_type == TableColumnType::Number + && !matches!( + self.number_list_alignment, + NumberListAlignment::Left | NumberListAlignment::Right + ) + { + let normalized_str = if self.number_list_alignment == NumberListAlignment::Normalize { + if let Ok(parsed) = row_segment.value.parse::() { + if parsed.is_nan() || parsed.is_infinite() { + self.number_list_alignment = NumberListAlignment::Left; + return; + } + let formatted = format_number_general(parsed); + if formatted.len() > 16 || formatted.contains('E') || formatted.contains('e') { + self.number_list_alignment = NumberListAlignment::Left; + return; + } + // Check for underflow (non-zero becoming zero) + if parsed == 0.0 && !is_truly_zero(&row_segment.value) { + self.number_list_alignment = NumberListAlignment::Left; + return; + } + formatted + } else { + self.number_list_alignment = NumberListAlignment::Left; + return; + } + } else { + row_segment.value.clone() + }; + + let index_of_dot = normalized_str.find(|c| c == '.' || c == 'e' || c == 'E'); + if let Some(idx) = index_of_dot { + self.max_dig_before_dec = self.max_dig_before_dec.max(idx); + self.max_dig_after_dec = self.max_dig_after_dec.max(normalized_str.len() - idx - 1); + } else { + self.max_dig_before_dec = self.max_dig_before_dec.max(normalized_str.len()); + } + } + } + + fn prune_and_recompute(&mut self, pads: &PaddedFormattingTokens, max_allowed_complexity: i32) { + if max_allowed_complexity <= 0 + || !matches!( + self.column_type, + TableColumnType::Array | TableColumnType::Object + ) + || self.row_count < 2 + { + self.children.clear(); + } + + for child in &mut self.children { + child.prune_and_recompute(pads, max_allowed_complexity - 1); + } + + if self.column_type == TableColumnType::Number { + self.composite_value_length = self.get_number_field_width(); + } else if !self.children.is_empty() { + let children_len: usize = self.children.iter().map(|c| c.total_length).sum(); + let commas_len = if self.children.len() > 1 { + pads.comma_len * (self.children.len() - 1) + } else { + 0 + }; + self.composite_value_length = children_len + + commas_len + + pads.arr_start_len(self.pad_type) + + pads.arr_end_len(self.pad_type); + + if self.contains_null && self.composite_value_length < pads.literal_null_len { + self.shorter_than_null_adjustment = + pads.literal_null_len - self.composite_value_length; + self.composite_value_length = pads.literal_null_len; + } + } else { + self.composite_value_length = self.max_value_length; + } + + self.total_length = if self.prefix_comment_length > 0 { + self.prefix_comment_length + pads.comment_len + } else { + 0 + } + if self.name_length > 0 { + self.name_length + pads.colon_len + } else { + 0 + } + if self.middle_comment_length > 0 { + self.middle_comment_length + pads.comment_len + } else { + 0 + } + self.composite_value_length + + if self.postfix_comment_length > 0 { + self.postfix_comment_length + pads.comment_len + } else { + 0 + }; + } + + fn get_template_complexity(&self) -> i32 { + if self.children.is_empty() { + 0 + } else { + 1 + self + .children + .iter() + .map(|c| c.get_template_complexity()) + .max() + .unwrap_or(0) + } + } + + fn get_number_field_width(&self) -> usize { + match self.number_list_alignment { + NumberListAlignment::Left | NumberListAlignment::Right => self.max_value_length, + NumberListAlignment::Decimal | NumberListAlignment::Normalize => { + if self.max_dig_after_dec > 0 { + self.max_dig_before_dec + 1 + self.max_dig_after_dec + } else { + self.max_dig_before_dec + } + } + } + } + + /// Format a number according to the alignment settings. + pub fn format_number(&self, value: &str, item_type: JsonItemType) -> (String, usize, usize) { + if item_type == JsonItemType::Null { + let left_pad = self.max_dig_before_dec.saturating_sub(4); // "null".len() + let right_pad = self + .composite_value_length + .saturating_sub(self.max_dig_before_dec); + return (value.to_string(), left_pad, right_pad); + } + + match self.number_list_alignment { + NumberListAlignment::Left => { + let right_pad = self.max_value_length.saturating_sub(value.len()); + (value.to_string(), 0, right_pad) + } + NumberListAlignment::Right => { + let left_pad = self.max_value_length.saturating_sub(value.len()); + (value.to_string(), left_pad, 0) + } + NumberListAlignment::Normalize => { + if let Ok(parsed) = value.parse::() { + let formatted = format!("{:.prec$}", parsed, prec = self.max_dig_after_dec); + let left_pad = self.composite_value_length.saturating_sub(formatted.len()); + (formatted, left_pad, 0) + } else { + (value.to_string(), 0, 0) + } + } + NumberListAlignment::Decimal => { + let index_of_dot = value.find(|c| c == '.' || c == 'e' || c == 'E'); + let (left_pad, right_pad) = if let Some(idx) = index_of_dot { + let left = self.max_dig_before_dec.saturating_sub(idx); + let right = self + .composite_value_length + .saturating_sub(left + value.len()); + (left, right) + } else { + let left = self.max_dig_before_dec.saturating_sub(value.len()); + let right = self + .composite_value_length + .saturating_sub(self.max_dig_before_dec); + (left, right) + }; + (value.to_string(), left_pad, right_pad) + } + } + } +} + +fn is_truly_zero(s: &str) -> bool { + // Check if the string represents a true zero value + let s = s.trim_start_matches('-'); + for c in s.chars() { + match c { + '0' | '.' => continue, + 'e' | 'E' => return true, // 0e... is still zero + _ => return false, + } + } + true +} + +fn format_number_general(value: f64) -> String { + // Format number similar to C#'s G format - general format + // Uses scientific notation for very large/small numbers, otherwise decimal + let abs_val = value.abs(); + if abs_val == 0.0 { + return "0".to_string(); + } + if abs_val >= 1e15 || abs_val < 1e-4 { + format!("{:E}", value) + } else { + // Remove trailing zeros after decimal point + let s = format!("{}", value); + s + } +} diff --git a/crates/fractured-json/src/tests.rs b/crates/fractured-json/src/tests.rs new file mode 100644 index 0000000000..94502e65da --- /dev/null +++ b/crates/fractured-json/src/tests.rs @@ -0,0 +1,679 @@ +use crate::{EolStyle, Formatter, FracturedJsonOptions, NumberListAlignment}; + +fn count_lines(s: &str) -> usize { + s.lines().count() +} + +fn test_instances_line_up(lines: &[&str], substring: &str) -> bool { + let positions: Vec> = lines.iter().map(|line| line.find(substring)).collect(); + + let found_positions: Vec = positions.iter().filter_map(|p| *p).collect(); + if found_positions.is_empty() { + return true; + } + + let first = found_positions[0]; + found_positions.iter().all(|&p| p == first) +} + +#[test] +fn test_simple_array_inline() { + let formatter = Formatter::new(); + let input = r#"[1, 2, 3]"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), "[1, 2, 3]"); +} + +#[test] +fn test_simple_object_inline() { + let formatter = Formatter::new(); + let input = r#"{"a": 1, "b": 2}"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), r#"{"a": 1, "b": 2}"#); +} + +#[test] +fn test_nested_array_inline() { + let formatter = Formatter::new(); + let input = r#"[[1, 2], [3, 4]]"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), "[ [1, 2], [3, 4] ]"); +} + +#[test] +fn test_empty_array() { + let formatter = Formatter::new(); + let input = r#"[]"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), "[]"); +} + +#[test] +fn test_empty_object() { + let formatter = Formatter::new(); + let input = r#"{}"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), "{}"); +} + +#[test] +fn test_null_value() { + let formatter = Formatter::new(); + let input = r#"null"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), "null"); +} + +#[test] +fn test_boolean_values() { + let formatter = Formatter::new(); + + let output = formatter.reformat("true", 0).unwrap(); + assert_eq!(output.trim(), "true"); + + let output = formatter.reformat("false", 0).unwrap(); + assert_eq!(output.trim(), "false"); +} + +#[test] +fn test_string_value() { + let formatter = Formatter::new(); + let input = r#""hello world""#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), r#""hello world""#); +} + +#[test] +fn test_number_value() { + let formatter = Formatter::new(); + let input = r#"42"#; + let output = formatter.reformat(input, 0).unwrap(); + assert_eq!(output.trim(), "42"); +} + +#[test] +fn test_correct_line_count_for_inline_complexity_0() { + let mut options = FracturedJsonOptions::default(); + options.max_inline_complexity = 0; + let formatter = Formatter::with_options(options); + + let input = r#"[[1, 2], [3, 4], [5, 6]]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // With complexity 0, nothing should be inlined + assert!(count_lines(&output) > 1); +} + +#[test] +fn test_correct_line_count_for_inline_complexity_1() { + let mut options = FracturedJsonOptions::default(); + options.max_inline_complexity = 1; + let formatter = Formatter::with_options(options); + + let input = r#"[[1, 2], [3, 4], [5, 6]]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // With complexity 1, inner arrays should be inlined but outer should expand + let lines: Vec<&str> = output.lines().collect(); + assert!(lines.len() >= 3); +} + +#[test] +fn test_correct_line_count_for_inline_complexity_2() { + let mut options = FracturedJsonOptions::default(); + options.max_inline_complexity = 2; + let formatter = Formatter::with_options(options); + + let input = r#"[[1, 2], [3, 4], [5, 6]]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // With complexity 2, everything should fit on one line + assert_eq!(count_lines(&output), 1); +} + +#[test] +fn test_max_line_length_respected() { + let mut options = FracturedJsonOptions::default(); + options.max_total_line_length = 40; + let max_len = options.max_total_line_length; + let formatter = Formatter::with_options(options); + + let input = r#"{"name": "John", "age": 30, "city": "New York"}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Check that no line exceeds max length (except possibly single elements) + for line in output.lines() { + if line.contains(':') && !line.contains('{') && !line.contains('}') { + // This is a property line, should respect max length + assert!( + line.len() <= max_len + 10, + "Line too long: {} (len={})", + line, + line.len() + ); + } + } +} + +#[test] +fn test_nested_elements_line_up() { + let formatter = Formatter::new(); + let input = r#"[ + {"type": "turret", "hp": 400}, + {"type": "assassin", "hp": 80}, + {"type": "berserker", "hp": 150} + ]"#; + let output = formatter.reformat(input, 0).unwrap(); + + let lines: Vec<&str> = output.lines().collect(); + + // Check that "type" properties line up + assert!(test_instances_line_up(&lines, "\"type\"")); + + // Check that "hp" properties line up + assert!(test_instances_line_up(&lines, "\"hp\"")); +} + +#[test] +fn test_array_table_formatting() { + let formatter = Formatter::new(); + let input = r#"[ + [0.0, 3.5, 10.5], + [0.0, 0.0, 1.2], + [0.4, 1.9, 4.4] + ]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should format as a table with aligned columns + let lines: Vec<&str> = output.lines().collect(); + // With default settings, this small array fits on one line (complexity 2, max_inline_complexity 2) + // So we just verify it's valid JSON and contains all the values + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok(), "Output should be valid JSON: {}", output); + assert!(output.contains("0.0")); + assert!(output.contains("3.5")); + assert!(output.contains("10.5")); +} + +#[test] +fn test_number_alignment_decimal() { + let mut options = FracturedJsonOptions::default(); + options.number_list_alignment = NumberListAlignment::Decimal; + let formatter = Formatter::with_options(options); + + let input = r#"[1.5, 10.25, 100.125]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Numbers should be aligned by decimal point in table/compact format + assert!(output.contains("1.5") || output.contains("1.5")); +} + +#[test] +fn test_number_alignment_left() { + let mut options = FracturedJsonOptions::default(); + options.number_list_alignment = NumberListAlignment::Left; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 10, 100]"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!(output.contains("1")); + assert!(output.contains("10")); + assert!(output.contains("100")); +} + +#[test] +fn test_number_alignment_right() { + let mut options = FracturedJsonOptions::default(); + options.number_list_alignment = NumberListAlignment::Right; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 10, 100]"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!(output.contains("1")); + assert!(output.contains("10")); + assert!(output.contains("100")); +} + +#[test] +fn test_no_trailing_whitespace() { + let formatter = Formatter::new(); + let input = r#"{"a": [1, 2, 3], "b": {"x": 1, "y": 2}}"#; + let output = formatter.reformat(input, 0).unwrap(); + + for line in output.lines() { + assert!( + !line.ends_with(' '), + "Line has trailing whitespace: '{}'", + line + ); + assert!(!line.ends_with('\t'), "Line has trailing tab: '{}'", line); + } +} + +#[test] +fn test_repeated_formatting_is_stable() { + let formatter = Formatter::new(); + let input = r#"{"a": [1, 2, 3], "b": {"x": 1, "y": 2}}"#; + + let first = formatter.reformat(input, 0).unwrap(); + let minified = formatter.minify(&first).unwrap(); + let second = formatter.reformat(&minified, 0).unwrap(); + let minified2 = formatter.minify(&second).unwrap(); + let third = formatter.reformat(&minified2, 0).unwrap(); + + assert_eq!( + second, third, + "Formatting should be stable after repeated format/minify cycles" + ); +} + +#[test] +fn test_is_well_formed() { + let formatter = Formatter::new(); + let input = r#"{"a": [1, 2, 3], "b": {"x": 1, "y": 2}}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // The output should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok(), "Output should be valid JSON: {}", output); +} + +#[test] +fn test_all_strings_exist() { + let formatter = Formatter::new(); + let input = r#"{"name": "John", "city": "New York", "tags": ["developer", "rust"]}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // All string values should appear in the output + assert!(output.contains("John")); + assert!(output.contains("New York")); + assert!(output.contains("developer")); + assert!(output.contains("rust")); + assert!(output.contains("name")); + assert!(output.contains("city")); + assert!(output.contains("tags")); +} + +#[test] +fn test_simple_bracket_padding() { + let mut options = FracturedJsonOptions::default(); + options.simple_bracket_padding = true; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 2, 3]"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!( + output.contains("[ ") && output.contains(" ]"), + "Simple bracket padding should add spaces: {}", + output + ); +} + +#[test] +fn test_no_simple_bracket_padding() { + let mut options = FracturedJsonOptions::default(); + options.simple_bracket_padding = false; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 2, 3]"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!( + output.starts_with('[') && !output.starts_with("[ "), + "No simple bracket padding: {}", + output + ); +} + +#[test] +fn test_nested_bracket_padding() { + let mut options = FracturedJsonOptions::default(); + options.nested_bracket_padding = true; + let formatter = Formatter::with_options(options); + + let input = r#"[[1, 2], [3, 4]]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Nested arrays should have padding + assert!( + output.contains("[ "), + "Nested bracket padding should add spaces: {}", + output + ); +} + +#[test] +fn test_indent_spaces() { + let mut options = FracturedJsonOptions::default(); + options.indent_spaces = 2; + options.max_inline_complexity = 0; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should use 2-space indentation + let lines: Vec<&str> = output.lines().collect(); + if lines.len() > 1 { + let indent_line = lines.iter().find(|l| l.starts_with(' ')).unwrap_or(&""); + let leading_spaces = indent_line.len() - indent_line.trim_start().len(); + assert_eq!(leading_spaces, 2, "Should use 2-space indent: {}", output); + } +} + +#[test] +fn test_colon_padding() { + let mut options = FracturedJsonOptions::default(); + options.colon_padding = true; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!( + output.contains(": "), + "Colon padding should add space after colon: {}", + output + ); +} + +#[test] +fn test_no_colon_padding() { + let mut options = FracturedJsonOptions::default(); + options.colon_padding = false; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!( + output.contains("\":") && !output.contains("\": "), + "No colon padding: {}", + output + ); +} + +#[test] +fn test_comma_padding() { + let mut options = FracturedJsonOptions::default(); + options.comma_padding = true; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 2, 3]"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!( + output.contains(", "), + "Comma padding should add space after comma: {}", + output + ); +} + +#[test] +fn test_no_comma_padding() { + let mut options = FracturedJsonOptions::default(); + options.comma_padding = false; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 2, 3]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should have commas without spaces + assert!( + output.contains(",") && !output.contains(", "), + "No comma padding: {}", + output + ); +} + +#[test] +fn test_eol_style_lf() { + let mut options = FracturedJsonOptions::default(); + options.json_eol_style = EolStyle::Lf; + options.max_inline_complexity = 0; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!(output.contains('\n'), "Should contain LF"); + assert!(!output.contains("\r\n"), "Should not contain CRLF"); +} + +#[test] +fn test_eol_style_crlf() { + let mut options = FracturedJsonOptions::default(); + options.json_eol_style = EolStyle::Crlf; + options.max_inline_complexity = 0; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 0).unwrap(); + + assert!(output.contains("\r\n"), "Should contain CRLF: {:?}", output); +} + +#[test] +fn test_always_expand_depth() { + let mut options = FracturedJsonOptions::default(); + options.always_expand_depth = 0; + let formatter = Formatter::with_options(options); + + let input = r#"[1, 2, 3]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Root should be expanded + assert!( + count_lines(&output) > 1, + "Root should be expanded: {}", + output + ); +} + +#[test] +fn test_minify() { + let formatter = Formatter::new(); + let input = r#"{ + "name": "John", + "age": 30 + }"#; + let output = formatter.minify(input).unwrap(); + + // Should be compact with no extra whitespace + assert!(!output.contains('\n'), "Minified should have no newlines"); + assert!( + !output.contains(" "), + "Minified should have no double spaces" + ); +} + +#[test] +fn test_complex_nested_structure() { + let formatter = Formatter::new(); + let input = r#"{ + "users": [ + {"name": "Alice", "scores": [95, 87, 92]}, + {"name": "Bob", "scores": [78, 85, 90]} + ], + "metadata": { + "version": 1, + "generated": true + } + }"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!( + parsed.is_ok(), + "Complex structure should produce valid JSON" + ); + + // All data should be preserved + assert!(output.contains("Alice")); + assert!(output.contains("Bob")); + assert!(output.contains("95")); + assert!(output.contains("version")); +} + +#[test] +fn test_deeply_nested() { + let formatter = Formatter::new(); + let input = r#"[[[[1]]]]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok()); + + // Should contain the value + assert!(output.contains('1')); +} + +#[test] +fn test_special_characters_in_strings() { + let formatter = Formatter::new(); + let input = r#"{"message": "Hello\nWorld\t!"}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok()); +} + +#[test] +fn test_unicode_strings() { + let formatter = Formatter::new(); + let input = r#"{"greeting": "Hello, \u4e16\u754c!"}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok()); +} + +#[test] +fn test_large_numbers() { + let formatter = Formatter::new(); + let input = r#"[1e308, -1e308, 1.7976931348623157e308]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok()); +} + +#[test] +fn test_small_numbers() { + let formatter = Formatter::new(); + let input = r#"[1e-308, 5e-324]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok()); +} + +#[test] +fn test_mixed_array() { + let formatter = Formatter::new(); + let input = r#"[1, "two", true, null, {"key": "value"}, [1, 2, 3]]"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok()); + + // All values should be present + assert!(output.contains("1")); + assert!(output.contains("two")); + assert!(output.contains("true")); + assert!(output.contains("null")); + assert!(output.contains("key")); +} + +#[test] +fn test_file_1_json() { + let formatter = Formatter::new(); + let input = include_str!("../tests/test_files/1.json"); + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok(), "File 1.json output should be valid JSON"); + + // No trailing whitespace + for (i, line) in output.lines().enumerate() { + assert!( + !line.ends_with(' '), + "Line {} has trailing whitespace: {:?}", + i + 1, + line + ); + } +} + +#[test] +fn test_file_2_json() { + let formatter = Formatter::new(); + let input = include_str!("../tests/test_files/2.json"); + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok(), "File 2.json output should be valid JSON"); +} + +#[test] +fn test_file_3_json() { + let formatter = Formatter::new(); + let input = include_str!("../tests/test_files/3.json"); + let output = formatter.reformat(input, 0).unwrap(); + + // Should be valid JSON (just "null") + let parsed: Result = serde_json::from_str(&output); + assert!(parsed.is_ok(), "File 3.json output should be valid JSON"); + assert_eq!(output.trim(), "null"); +} + +#[test] +fn test_prefix_string() { + let mut options = FracturedJsonOptions::default(); + options.prefix_string = "// ".to_string(); + options.max_inline_complexity = 0; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 0).unwrap(); + + // Every line should start with the prefix + for line in output.lines() { + assert!( + line.starts_with("// "), + "Line should start with prefix: {}", + line + ); + } +} + +#[test] +fn test_starting_depth() { + let mut options = FracturedJsonOptions::default(); + options.max_inline_complexity = 0; + let formatter = Formatter::with_options(options); + + let input = r#"{"a": 1}"#; + let output = formatter.reformat(input, 2).unwrap(); + + // Lines should be indented by starting depth + let first_line = output.lines().next().unwrap(); + let leading_spaces = first_line.len() - first_line.trim_start().len(); + assert!( + leading_spaces >= 8, + "Should have starting depth indentation: {}", + output + ); +} diff --git a/crates/fractured-json/tests/test_files/0.json b/crates/fractured-json/tests/test_files/0.json new file mode 100644 index 0000000000..5d4515c5c8 --- /dev/null +++ b/crates/fractured-json/tests/test_files/0.json @@ -0,0 +1,204 @@ +{ + "BannerText": [ + "Sometimes you'll have to protect multiple enemy goals." + ], + "Terrain": { + "TileTypes": [ + { + "BlocksMovement": false, + "BlocksVision": false, + "Appearance": " ", + "Name": "Open" + }, + { + "BlocksMovement": true, + "BlocksVision": true, + "Appearance": "*", + "Name": "Stone" + }, + { + "BlocksMovement": true, + "BlocksVision": false, + "Appearance": "~", + "Name": "Water" + }, + { + "BlocksMovement": false, + "BlocksVision": true, + "Appearance": "@", + "Name": "Fog" + } + ], + "Width": 45, + "Height": 20, + "Tiles": [ + " *** *** ******** ", + " ******* **** ", + "*********** **** ", + "*********** *******", + "********* *********** ", + " ****** ******** ", + " *** ****", + " ", + " ", + " ", + " * ****", + " * ***** ** ", + "**** ****** **** ", + "***** **** ** ", + "****** *** ** ", + "****** * ", + " ** ****** ****", + " * * ", + " **** ", + " ** " + ], + "SpawnPointsMap": { + "1": [ + [ + 1, + 8 + ] + ] + }, + "GoalPointsMap": { + "1": [ + [ + 43, + 8 + ], + [ + 43, + 18 + ] + ] + } + }, + "AttackPlans": [ + { + "TeamId": 1, + "Spawns": [ + { + "Time": 0.0, + "UnitType": "Grunt", + "SpawnPointIndex": 0 + }, + { + "Time": 0.0, + "UnitType": "Grunt", + "SpawnPointIndex": 0 + }, + { + "Time": 0.0, + "UnitType": "Grunt", + "SpawnPointIndex": 0 + }, + { + "Time": 0.0, + "UnitType": "Grunt", + "SpawnPointIndex": 0 + }, + { + "Time": 0.0, + "UnitType": "Grunt", + "SpawnPointIndex": 0 + } + ] + }, + { + "TeamId": 2, + "Spawns": [] + } + ], + "DefensePlans": [ + { + "TeamId": 2, + "Placements": [ + { + "UnitType": "Archer", + "Position": [ + 41, + 7 + ] + }, + { + "UnitType": "Archer", + "Position": [ + 41, + 8 + ] + }, + { + "UnitType": "Archer", + "Position": [ + 41, + 9 + ] + }, + { + "UnitType": "Pikeman", + "Position": [ + 40, + 9 + ] + }, + { + "UnitType": "Pikeman", + "Position": [ + 40, + 8 + ] + }, + { + "UnitType": "Pikeman", + "Position": [ + 40, + 7 + ] + }, + { + "UnitType": "Barricade", + "Position": [ + 39, + 7 + ] + }, + { + "UnitType": "Barricade", + "Position": [ + 39, + 8 + ] + }, + { + "UnitType": "Barricade", + "Position": [ + 39, + 9 + ] + }, + { + "UnitType": "Archer", + "Position": [ + 41, + 18 + ] + } + ] + } + ], + "Challenges": [ + { + "Name": "*", + "PlayerTeamId": 2, + "AttackersMustNotReachGoal": false, + "MaximumUnitTypeCount": {} + }, + { + "Name": "**", + "PlayerTeamId": 2, + "AttackersMustNotReachGoal": true, + "MaximumUnitTypeCount": {} + } + ] +} diff --git a/crates/fractured-json/tests/test_files/1.json b/crates/fractured-json/tests/test_files/1.json new file mode 100644 index 0000000000..dd1f3e4224 --- /dev/null +++ b/crates/fractured-json/tests/test_files/1.json @@ -0,0 +1,195 @@ +{ + "SimpleItem": 77, + "ShortArray": ["blue", "blue", "orange", "gray"], + "ShortArray2": [6.02e23, 5000000000, 4], + "LongArray": [ + 2, + 3, + 5, + 7, + 11, + 13, + 17, + 19, + 23, + 29, + 31, + 37, + 41, + 43, + 47, + 53, + 59, + 61, + 67, + 71, + 73, + 79, + 83, + 89, + 97, + 101, + 103, + 107, + 109, + 113, + 127, + 131, + 137, + 139, + 149, + 151, + 157, + 163, + 167, + 173, + 179, + 181, + 191, + 193, + 197, + 199, + 211, + 223, + 227, + 229, + 233, + 239, + 241, + 251, + 257, + 263, + 269, + 271, + 277, + 281, + 283, + 293, + 307, + 311, + 313, + 317, + 331, + 337, + 347, + 349, + 353, + 359, + 367, + 373, + 379, + 383, + 389, + 397, + 401, + 409, + 419, + 421, + 431, + 433, + 439, + 443, + 449, + 457, + 461, + 463, + 467, + 479, + 487, + 491, + 499, + 503, + 509, + 521, + 523, + 541, + 547, + 557, + 563, + 569, + 571, + 577, + 587, + 593, + 599, + 601, + 607, + 613, + 617, + 619, + 631, + 641, + 643, + 647, + 653, + 659, + 661, + 673, + 677, + 683, + 691, + 701, + 709, + 719, + 727, + 733, + 739, + 743, + 751, + 757, + 761, + 769, + 773, + 787, + 797, + 809, + 811, + 821, + 823, + 827, + 829, + 839, + 853, + 857, + 859, + 863, + 877, + 881, + 883, + 887, + 907, + 911, + 919, + 929, + 937, + 941, + 947, + 953, + 967, + 971, + 977, + 983, + 991, + 997 + ], + "LongArray2": [ + [19, 2], + [3, 8], + [14, 0], + [9, 9], + [9, 9], + [0, 3], + [10, 1], + [9, 1], + [9, 2], + [6, 13], + [18, 5], + [4, 11], + [12, 2] + ], + "ComplexObject": { + "Subthing1": { "X": 55, "Y": 19, "Z": -4 }, + "Subthing2": { "Q": null, "W": [-2, -1, 0, 1] }, + "Distraction": [[], null, null] + } +} diff --git a/crates/fractured-json/tests/test_files/2.json b/crates/fractured-json/tests/test_files/2.json new file mode 100644 index 0000000000..470a626146 --- /dev/null +++ b/crates/fractured-json/tests/test_files/2.json @@ -0,0 +1,98 @@ +{ + "ObjectColumnsObjectRows": { + "Vera": { + "street": "12 Madeup St.", + "city": "Boston", + "state": "MA", + "zip": "02127" + }, + "Chuck": { + "street": "994 Fake Place", + "unit": "102", + "city": "Las Vegas", + "state": "NV", + "zip": "89102" + }, + "Dave": { + "street": "1967 Void Rd.", + "unit": "B", + "city": "Athens", + "state": "GA" + } + }, + "ObjectColumnsArrayRows": { + "Katherine": ["blue", "lightblue", "black"], + "Logan": ["yellow", "blue", "black", "red"], + "Erik": ["red", "purple"], + "Jean": ["lightgreen", "yellow", "black"] + }, + "ArrayColumnsObjectRows": [ + { + "type": "turret", + "hp": 400, + "loc": { + "x": 47, + "y": -4 + }, + "flags": ["stationary"] + }, + { + "type": "assassin", + "hp": 80, + "loc": { + "x": 102, + "y": 6 + }, + "flags": ["stealth"] + }, + { + "type": "berserker", + "hp": 150, + "loc": { + "x": 0, + "y": 0 + } + }, + { + "type": "pittrap", + "loc": { + "x": 10, + "y": -14 + }, + "flags": ["invulnerable", "stationary"] + } + ], + "ArrayColumnsArrayRows": [ + [0.0, 3.5, 10.5, 6.5, 2.5, 0.6], + [0.0, 0.0, 1.2, 2.1, 6.7, 4.4], + [0.4, 1.9, 4.4, 5.4, 2.35, 2.0], + [7.4, 1.2, 0.01, 0.0, 2.91, 0.2] + ], + "DissimilarObjectRows": { + "lamp": { + "x": 4, + "y": 1002, + "r": 75, + "g": 0, + "b": 130, + "state": 1 + }, + "address": { + "city": "San Diego", + "state": "CA" + }, + "font": { + "r": 0, + "g": 12, + "b": 40, + "size": 18, + "style": "italic" + } + }, + "DissimilarArrayRows": { + "primes": [2, 3, 5, 7, 11], + "powersOf2": [1, 2, 4, 8, 16, 32, 64, 128, 256], + "factorsOf12": [2, 2, 3], + "someZeros": [0, 0, 0, 0] + } +} diff --git a/crates/fractured-json/tests/test_files/3.json b/crates/fractured-json/tests/test_files/3.json new file mode 100644 index 0000000000..19765bd501 --- /dev/null +++ b/crates/fractured-json/tests/test_files/3.json @@ -0,0 +1 @@ +null diff --git a/crates/fractured-json/tests/test_files/4.json b/crates/fractured-json/tests/test_files/4.json new file mode 100644 index 0000000000..33d53bd389 --- /dev/null +++ b/crates/fractured-json/tests/test_files/4.json @@ -0,0 +1 @@ +"A single string is a valid JSON element" diff --git a/crates/fractured-json/tests/test_files/5.json b/crates/fractured-json/tests/test_files/5.json new file mode 100644 index 0000000000..98a1c473af --- /dev/null +++ b/crates/fractured-json/tests/test_files/5.json @@ -0,0 +1,17 @@ +[ + { + "Name": "李小龍", + "Job": "Actor", + "Born": 1940 + }, + { + "Name": "Mark Twain", + "Job": "Writer", + "Born": 1835 + }, + { + "Name": "孫子", + "Job": "General", + "Born": -544 + } +] diff --git a/crates/fractured-json/tests/test_files/6.json b/crates/fractured-json/tests/test_files/6.json new file mode 100644 index 0000000000..a9466fadf7 --- /dev/null +++ b/crates/fractured-json/tests/test_files/6.json @@ -0,0 +1,6 @@ +{ + "Rect": { "position": { "x": -44, "y": 3.4 }, "color": [0, 255, 255] }, + "Point": { "position": { "y": 22, "z": 3 } }, + "Oval": { "position": { "x": 140, "y": 0.04 }, "color": "#7f3e96" }, + "Plane": { "position": null, "color": [0, 64, 64] } +}