Skip to content

Commit cf803c7

Browse files
committed
Bug fix for SplitRecursively: ditch last chunk if empty.
1 parent a231aed commit cf803c7

File tree

1 file changed

+24
-20
lines changed

1 file changed

+24
-20
lines changed

src/ops/functions/split_recursively.rs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static SEPARATORS_BY_LANG: LazyLock<HashMap<&'static str, Vec<Regex>>> = LazyLoc
9191
.collect()
9292
});
9393

94-
trait NestedChunk: Sized {
94+
trait NestedChunk {
9595
fn range(&self) -> &RangeValue;
9696

9797
fn sub_chunks(&self) -> Option<impl Iterator<Item = Self>>;
@@ -128,25 +128,29 @@ impl<'a, 's: 'a> Iterator for SubChunksIter<'a, 's> {
128128
type Item = Chunk<'s>;
129129

130130
fn next(&mut self) -> Option<Self::Item> {
131-
if let Some(start_pos) = self.next_start_pos {
132-
let end_pos = match self.matches_iter.next() {
133-
Some(grp) => {
134-
self.next_start_pos = Some(self.parent.range.start + grp.end());
135-
self.parent.range.start + grp.start()
136-
}
137-
None => {
138-
self.next_start_pos = None;
139-
self.parent.range.end
140-
}
141-
};
142-
Some(Chunk {
143-
target: self.parent.target,
144-
range: RangeValue::new(start_pos, end_pos),
145-
next_sep_id: self.parent.next_sep_id + 1,
146-
})
131+
let start_pos = if let Some(start_pos) = self.next_start_pos {
132+
start_pos
147133
} else {
148-
None
149-
}
134+
return None;
135+
};
136+
let end_pos = match self.matches_iter.next() {
137+
Some(grp) => {
138+
self.next_start_pos = Some(self.parent.range.start + grp.end());
139+
self.parent.range.start + grp.start()
140+
}
141+
None => {
142+
self.next_start_pos = None;
143+
if start_pos >= self.parent.range.end {
144+
return None;
145+
}
146+
self.parent.range.end
147+
}
148+
};
149+
Some(Chunk {
150+
target: self.parent.target,
151+
range: RangeValue::new(start_pos, end_pos),
152+
next_sep_id: self.parent.next_sep_id + 1,
153+
})
150154
}
151155
}
152156

@@ -177,7 +181,7 @@ struct RecursiveChunker<'s> {
177181
impl<'s> RecursiveChunker<'s> {
178182
fn split_substring<Chk>(&self, chunk: Chk, output: &mut Vec<(RangeValue, &'s str)>)
179183
where
180-
Chk: NestedChunk,
184+
Chk: NestedChunk + Sized,
181185
{
182186
let sub_chunks_iter = if let Some(sub_chunks_iter) = chunk.sub_chunks() {
183187
sub_chunks_iter

0 commit comments

Comments
 (0)