From 2434573a37991b20d1a065e1b596bce3854161e8 Mon Sep 17 00:00:00 2001 From: LJ Date: Sat, 15 Mar 2025 11:22:41 -0700 Subject: [PATCH] Also trim leading new lines for chunks in `SplitRecursively`. --- src/ops/functions/split_recursively.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ops/functions/split_recursively.rs b/src/ops/functions/split_recursively.rs index 5425f3f68..1646a6293 100644 --- a/src/ops/functions/split_recursively.rs +++ b/src/ops/functions/split_recursively.rs @@ -272,10 +272,18 @@ impl<'t, 's: 't> RecursiveChunker<'s> { fn add_output(&self, range: RangeValue, output: &mut Vec<(RangeValue, &'s str)>) { let text = range.extract_str(self.full_text); - let trimmed_text = text.trim_end(); + + // Trim leading new lines. + let trimmed_text = text.trim_start_matches(['\n', '\r']); + let adjusted_start = range.start + (text.len() - trimmed_text.len()); + + // Trim trailing whitespaces + let trimmed_text = trimmed_text.trim_end(); + + // Only record non-empty chunks. if !trimmed_text.is_empty() { output.push(( - RangeValue::new(range.start, range.start + trimmed_text.len()), + RangeValue::new(adjusted_start, adjusted_start + trimmed_text.len()), trimmed_text, )); }