@@ -22,13 +22,14 @@ struct Cli {
2222 show_diff : bool ,
2323}
2424
25- static REGEX_IGNORE : LazyLock < Regex > =
26- LazyLock :: new ( || Regex :: new ( r"^\s*(\d\.|\-|\*)\s+" ) . unwrap ( ) ) ;
2725static REGEX_IGNORE_END : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"(\.|\?|;|!)$" ) . unwrap ( ) ) ;
2826static REGEX_IGNORE_LINK_TARGETS : LazyLock < Regex > =
2927 LazyLock :: new ( || Regex :: new ( r"^\[.+\]: " ) . unwrap ( ) ) ;
3028static REGEX_SPLIT : LazyLock < Regex > =
31- LazyLock :: new ( || Regex :: new ( r"([^\.]\.|[^r]\?|;|!)\s+" ) . unwrap ( ) ) ;
29+ LazyLock :: new ( || Regex :: new ( r"([^\.\d\-\*]\.|[^r]\?|;|!)\s" ) . unwrap ( ) ) ;
30+ // list elements, numbered (1.) or not (- and *)
31+ static REGEX_LIST_ENTRY : LazyLock < Regex > =
32+ LazyLock :: new ( || Regex :: new ( r"^\s*(\d\.|\-|\*)\s+" ) . unwrap ( ) ) ;
3233
3334fn main ( ) -> Result < ( ) > {
3435 let cli = Cli :: parse ( ) ;
@@ -99,7 +100,6 @@ fn ignore(line: &str, in_code_block: bool) -> bool {
99100 || line. trim_start ( ) . starts_with ( '>' )
100101 || line. starts_with ( '#' )
101102 || line. trim ( ) . is_empty ( )
102- || REGEX_IGNORE . is_match ( line)
103103 || REGEX_IGNORE_LINK_TARGETS . is_match ( line)
104104}
105105
@@ -120,11 +120,19 @@ fn comply(content: &str) -> String {
120120 continue ;
121121 }
122122 if REGEX_SPLIT . is_match ( & line) {
123- let indent = line. find ( |ch : char | !ch. is_whitespace ( ) ) . unwrap ( ) ;
124- let new_lines: Vec < _ > = line
125- . split_inclusive ( & * REGEX_SPLIT )
126- . map ( |portion| format ! ( "{:indent$}{}" , "" , portion. trim( ) ) )
123+ let indent = if let Some ( regex_match) = REGEX_LIST_ENTRY . find ( & line) {
124+ regex_match. len ( )
125+ } else {
126+ line. find ( |ch : char | !ch. is_whitespace ( ) ) . unwrap ( )
127+ } ;
128+ let mut newly_split_lines = line. split_inclusive ( & * REGEX_SPLIT ) ;
129+ let first = newly_split_lines. next ( ) . unwrap ( ) . trim_end ( ) . to_owned ( ) ;
130+ let mut remaining: Vec < _ > = newly_split_lines
131+ . map ( |portion| format ! ( "{:indent$}{}" , "" , portion. trim_end( ) ) )
127132 . collect ( ) ;
133+ let mut new_lines = Vec :: new ( ) ;
134+ new_lines. push ( first) ;
135+ new_lines. append ( & mut remaining) ;
128136 new_content. splice ( new_n..=new_n, new_lines. clone ( ) ) ;
129137 new_n += new_lines. len ( ) - 1 ;
130138 }
@@ -184,40 +192,45 @@ fn lengthen_lines(content: &str, limit: usize) -> String {
184192fn test_sembr ( ) {
185193 let original = "\
186194 # some. heading
187- must! be; split? and. normalizes space
188- 1. ignore numbered
195+ must! be; split?
196+ 1. ignore a dot after number. but no further
189197ignore | tables
190198ignore e.g. and
191199ignore i.e. and
192200ignore E.g. too
193- - ignore. list
194- * ignore. list
201+ - list. entry
202+ * list. entry
195203```
196204some code. block
197205```
198206sentence with *italics* should not be ignored. truly.
199207git log main.. compiler
208+ foo. bar. baz
200209" ;
201210 let expected = "\
202211 # some. heading
203212must!
204213be;
205214split?
206- and.
207- normalizes space
208- 1. ignore numbered
215+ 1. ignore a dot after number.
216+ but no further
209217ignore | tables
210218ignore e.g. and
211219ignore i.e. and
212220ignore E.g. too
213- - ignore. list
214- * ignore. list
221+ - list.
222+ entry
223+ * list.
224+ entry
215225```
216226some code. block
217227```
218228sentence with *italics* should not be ignored.
219229truly.
220230git log main.. compiler
231+ foo.
232+ bar.
233+ baz
221234" ;
222235 assert_eq ! ( expected, comply( original) ) ;
223236}
0 commit comments