From d5393a874a7c0fc60e68751319cc172581ff64df Mon Sep 17 00:00:00 2001 From: Adam Chalmers Date: Mon, 21 Jul 2025 21:33:19 -0500 Subject: [PATCH 1/2] WIP Faster array recast --- rust/kcl-lib/src/unparser.rs | 94 ++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/rust/kcl-lib/src/unparser.rs b/rust/kcl-lib/src/unparser.rs index c1e87ebc2a2..fbed7e5d028 100644 --- a/rust/kcl-lib/src/unparser.rs +++ b/rust/kcl-lib/src/unparser.rs @@ -552,73 +552,61 @@ impl TagDeclarator { impl ArrayExpression { fn recast(&self, buf: &mut String, options: &FormatOptions, indentation_level: usize, ctxt: ExprContext) { - // Reconstruct the order of items in the array. - // An item can be an element (i.e. an expression for a KCL value), - // or a non-code item (e.g. a comment) let num_items = self.elements.len() + self.non_code_meta.non_code_nodes_len(); let mut elems = self.elements.iter(); + let mut items_buf = String::with_capacity(512); + let mut item_spans = Vec::with_capacity(num_items); let mut found_line_comment = false; - let mut format_items: Vec<_> = Vec::with_capacity(num_items); + for i in 0..num_items { + let start = items_buf.len(); if let Some(noncode) = self.non_code_meta.non_code_nodes.get(&i) { - format_items.extend(noncode.iter().map(|nc| { + for nc in noncode { found_line_comment |= nc.value.should_cause_array_newline(); - nc.recast(options, 0) - })); + items_buf.push_str(&nc.recast(options, 0)); + } } else { let el = elems.next().unwrap(); - let mut s = String::with_capacity(256); - el.recast(&mut s, options, 0, ExprContext::Other); - s.push_str(", "); - format_items.push(s); - } - } - - // Format these items into a one-line array. - if let Some(item) = format_items.last_mut() { - if let Some(norm) = item.strip_suffix(", ") { - *item = norm.to_owned(); + el.recast(&mut items_buf, options, 0, ExprContext::Other); + items_buf.push_str(", "); } + let end = items_buf.len(); + item_spans.push((start, end)); } - let mut flat_recast = String::with_capacity(256); - flat_recast.push('['); - for fi in &format_items { - flat_recast.push_str(fi) - } - flat_recast.push(']'); - // We might keep the one-line representation, if it's short enough. let max_array_length = 40; - let multi_line = flat_recast.len() > max_array_length || found_line_comment; - if !multi_line { - buf.push_str(&flat_recast); + let flat_length = items_buf.trim_end_matches(", ").len(); + let use_flat = flat_length <= max_array_length && !found_line_comment; + + if use_flat { + buf.push('['); + buf.push_str(&items_buf[..flat_length]); + buf.push(']'); return; } - // Otherwise, we format a multi-line representation. + // Multi-line representation buf.push_str("[\n"); - let inner_indentation = if ctxt == ExprContext::Pipe { - options.get_indentation_offset_pipe(indentation_level + 1) - } else { - options.get_indentation(indentation_level + 1) + let indent_inner = match ctxt { + ExprContext::Pipe => options.get_indentation_offset_pipe(indentation_level + 1), + _ => options.get_indentation(indentation_level + 1), }; - for format_item in format_items { - buf.push_str(&inner_indentation); - buf.push_str(if let Some(x) = format_item.strip_suffix(" ") { - x + let n = item_spans.len(); + for (i, (start, end)) in item_spans.iter().enumerate() { + buf.push_str(&indent_inner); + let s = items_buf[*start..*end].trim_end(); + if i < n - 1 { + buf.push_str(s); } else { - &format_item - }); - if !format_item.ends_with('\n') { - buf.push('\n') + buf.push_str(s.trim_matches(',')) } + buf.push('\n'); } - let end_indent = if ctxt == ExprContext::Pipe { - options.get_indentation_offset_pipe(indentation_level) - } else { - options.get_indentation(indentation_level) + let indent_end = match ctxt { + ExprContext::Pipe => options.get_indentation_offset_pipe(indentation_level), + _ => options.get_indentation(indentation_level), }; - buf.push_str(&end_indent); + buf.push_str(&indent_end); buf.push(']'); } } @@ -2998,4 +2986,18 @@ fn function001() { let expected = code; assert_eq!(recasted, expected); } + + #[test] + fn check_arr() { + let code = "[ + -0.8111463382182231, + -0.41814807547140576, +] +"; + + let ast = crate::parsing::top_level_parse(code).unwrap(); + let recasted = ast.recast_top(&FormatOptions::new(), 0); + let expected = code; + assert_eq!(recasted, expected); + } } From b9aa82db2f404a2b12a6e3aa3233ddcd48bb5e21 Mon Sep 17 00:00:00 2001 From: Adam Chalmers Date: Mon, 21 Jul 2025 21:41:26 -0500 Subject: [PATCH 2/2] KCL: Faster recasting of arrays Before this PR, array expression recasting was a bit slow. The reason was that there's two ways to recast an array: - Single line array string (all items on one line) - Multi-line array string (n lines, one per item) The steps were basically: 1. Recast each item, collecting into a vec with n strings (one string per item) 2. Concat the items into a single-line array string 3. If the line is too long, discard it and do a multi-line array string. Giving each item its own string meant we were doing N allocations (one per item) but it meant that each item was only recast once. This PR has a better approach. We build a single-line array first, and if it's too long, then ignore it and do a multi-line array. HOWEVER, the difference is that we track each item's offset within the single-line array string. If we then need to convert it into a multi-line array string, we can do that easily by looking up each item's offset. Improves benchmarks by 7% and 20% on my macbook pro. --- rust/kcl-lib/src/unparser.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/rust/kcl-lib/src/unparser.rs b/rust/kcl-lib/src/unparser.rs index fbed7e5d028..e04ceb8fc06 100644 --- a/rust/kcl-lib/src/unparser.rs +++ b/rust/kcl-lib/src/unparser.rs @@ -554,7 +554,7 @@ impl ArrayExpression { fn recast(&self, buf: &mut String, options: &FormatOptions, indentation_level: usize, ctxt: ExprContext) { let num_items = self.elements.len() + self.non_code_meta.non_code_nodes_len(); let mut elems = self.elements.iter(); - let mut items_buf = String::with_capacity(512); + let mut items_buf = String::with_capacity(256); let mut item_spans = Vec::with_capacity(num_items); let mut found_line_comment = false; @@ -568,19 +568,21 @@ impl ArrayExpression { } else { let el = elems.next().unwrap(); el.recast(&mut items_buf, options, 0, ExprContext::Other); - items_buf.push_str(", "); + if i < num_items - 1 { + items_buf.push_str(", "); + } } let end = items_buf.len(); item_spans.push((start, end)); } + let flat_length = items_buf.len() + 2; // +2 is for the [ and ] which will surround the items. let max_array_length = 40; - let flat_length = items_buf.trim_end_matches(", ").len(); - let use_flat = flat_length <= max_array_length && !found_line_comment; + let multi_line = flat_length > max_array_length || found_line_comment; - if use_flat { + if !multi_line { buf.push('['); - buf.push_str(&items_buf[..flat_length]); + buf.push_str(&items_buf[..flat_length - 2]); // subtract the [ and ] again buf.push(']'); return; } @@ -2991,7 +2993,7 @@ fn function001() { fn check_arr() { let code = "[ -0.8111463382182231, - -0.41814807547140576, + -0.41814807547140576 ] ";