55
66//! Utility types and traits
77
8+ use icu_properties:: { CodePointMapData , props:: LineBreak } ;
9+ use icu_segmenter:: { LineSegmenter , iterators:: LineBreakIterator , scaffold:: Utf8 } ;
810use std:: ops:: Range ;
9- use std:: str:: { CharIndices , Chars } ;
10- use swash:: text:: cluster:: Boundary ;
1111
1212/// Describes the state-of-preparation of a [`TextDisplay`][crate::TextDisplay]
1313#[ derive( Copy , Clone , Debug , Default , PartialEq , Eq , Ord , PartialOrd , Hash ) ]
@@ -68,27 +68,44 @@ impl<T: Clone> Iterator for OwningVecIter<T> {
6868impl < T : Clone > ExactSizeIterator for OwningVecIter < T > { }
6969impl < T : Clone > std:: iter:: FusedIterator for OwningVecIter < T > { }
7070
71+ /// Returns `true` when `text` ends with a hard break, assuming that it ends
72+ /// with a valid line break.
73+ ///
74+ /// This filter is copied from icu_segmenter docs.
75+ pub ( crate ) fn ends_with_hard_break ( text : & str ) -> bool {
76+ text. chars ( ) . next_back ( ) . is_some_and ( |c| {
77+ matches ! (
78+ CodePointMapData :: <LineBreak >:: new( ) . get( c) ,
79+ LineBreak :: MandatoryBreak
80+ | LineBreak :: CarriageReturn
81+ | LineBreak :: LineFeed
82+ | LineBreak :: NextLine
83+ )
84+ } )
85+ }
86+
7187/// Iterator over lines / paragraphs within the text
7288///
7389/// This iterator splits the input text into a sequence of "lines" at mandatory
7490/// breaks (see [TR14#BK](https://www.unicode.org/reports/tr14/#BK)).
7591/// The resulting slices cover the whole input text in order without overlap.
7692pub struct LineIterator < ' a > {
77- analyzer : swash :: text :: Analyze < Chars < ' a > > ,
78- char_indices : CharIndices < ' a > ,
93+ break_iter : LineBreakIterator < ' static , ' a , Utf8 > ,
94+ text : & ' a str ,
7995 start : usize ,
80- len : usize ,
8196}
8297
8398impl < ' a > LineIterator < ' a > {
8499 /// Construct
85100 #[ inline]
86101 pub fn new ( text : & ' a str ) -> Self {
102+ let segmenter = LineSegmenter :: new_auto ( Default :: default ( ) ) ;
103+ let mut break_iter = segmenter. segment_str ( text) ;
104+ assert_eq ! ( break_iter. next( ) , Some ( 0 ) ) ; // the iterator always reports a break at 0
87105 LineIterator {
88- analyzer : swash :: text :: analyze ( text . chars ( ) ) ,
89- char_indices : text. char_indices ( ) ,
106+ break_iter ,
107+ text,
90108 start : 0 ,
91- len : text. len ( ) ,
92109 }
93110 }
94111}
@@ -97,24 +114,13 @@ impl<'a> Iterator for LineIterator<'a> {
97114 type Item = Range < usize > ;
98115
99116 fn next ( & mut self ) -> Option < Self :: Item > {
100- if self . start >= self . len {
101- return None ;
102- }
103-
104- for ( index, _) in self . char_indices . by_ref ( ) {
105- let ( _, boundary) = self . analyzer . next ( ) . unwrap ( ) ;
106-
107- if index > 0 && boundary == Boundary :: Mandatory {
117+ while let Some ( index) = self . break_iter . next ( ) {
118+ if ends_with_hard_break ( & self . text [ ..index] ) || index == self . text . len ( ) {
108119 let range = self . start ..index;
109120 self . start = index;
110121 return Some ( range) ;
111122 }
112123 }
113-
114- debug_assert ! ( self . analyzer. next( ) . is_none( ) ) ;
115-
116- let range = self . start ..self . len ;
117- self . start = self . len ;
118- Some ( range)
124+ None
119125 }
120126}
0 commit comments