Skip to content

Commit 6e20d64

Browse files
committed
do not ignore lines that are list entries
1 parent 2430eb8 commit 6e20d64

File tree

1 file changed

+30
-17
lines changed

1 file changed

+30
-17
lines changed

ci/sembr/src/main.rs

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@ struct Cli {
2222
show_diff: bool,
2323
}
2424

25-
static REGEX_IGNORE: LazyLock<Regex> =
26-
LazyLock::new(|| Regex::new(r"^\s*(\d\.|\-|\*)\s+").unwrap());
2725
static REGEX_IGNORE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)$").unwrap());
2826
static REGEX_IGNORE_LINK_TARGETS: LazyLock<Regex> =
2927
LazyLock::new(|| Regex::new(r"^\[.+\]: ").unwrap());
3028
static REGEX_SPLIT: LazyLock<Regex> =
31-
LazyLock::new(|| Regex::new(r"([^\.]\.|[^r]\?|;|!)\s+").unwrap());
29+
LazyLock::new(|| Regex::new(r"([^\.\d\-\*]\.|[^r]\?|;|!)\s").unwrap());
30+
// list elements, numbered (1.) or not (- and *)
31+
static REGEX_LIST_ENTRY: LazyLock<Regex> =
32+
LazyLock::new(|| Regex::new(r"^\s*(\d\.|\-|\*)\s+").unwrap());
3233

3334
fn main() -> Result<()> {
3435
let cli = Cli::parse();
@@ -99,7 +100,6 @@ fn ignore(line: &str, in_code_block: bool) -> bool {
99100
|| line.trim_start().starts_with('>')
100101
|| line.starts_with('#')
101102
|| line.trim().is_empty()
102-
|| REGEX_IGNORE.is_match(line)
103103
|| REGEX_IGNORE_LINK_TARGETS.is_match(line)
104104
}
105105

@@ -120,11 +120,19 @@ fn comply(content: &str) -> String {
120120
continue;
121121
}
122122
if REGEX_SPLIT.is_match(&line) {
123-
let indent = line.find(|ch: char| !ch.is_whitespace()).unwrap();
124-
let new_lines: Vec<_> = line
125-
.split_inclusive(&*REGEX_SPLIT)
126-
.map(|portion| format!("{:indent$}{}", "", portion.trim()))
123+
let indent = if let Some(regex_match) = REGEX_LIST_ENTRY.find(&line) {
124+
regex_match.len()
125+
} else {
126+
line.find(|ch: char| !ch.is_whitespace()).unwrap()
127+
};
128+
let mut newly_split_lines = line.split_inclusive(&*REGEX_SPLIT);
129+
let first = newly_split_lines.next().unwrap().trim_end().to_owned();
130+
let mut remaining: Vec<_> = newly_split_lines
131+
.map(|portion| format!("{:indent$}{}", "", portion.trim_end()))
127132
.collect();
133+
let mut new_lines = Vec::new();
134+
new_lines.push(first);
135+
new_lines.append(&mut remaining);
128136
new_content.splice(new_n..=new_n, new_lines.clone());
129137
new_n += new_lines.len() - 1;
130138
}
@@ -184,40 +192,45 @@ fn lengthen_lines(content: &str, limit: usize) -> String {
184192
fn test_sembr() {
185193
let original = "\
186194
# some. heading
187-
must! be; split? and. normalizes space
188-
1. ignore numbered
195+
must! be; split?
196+
1. ignore a dot after number. but no further
189197
ignore | tables
190198
ignore e.g. and
191199
ignore i.e. and
192200
ignore E.g. too
193-
- ignore. list
194-
* ignore. list
201+
- list. entry
202+
* list. entry
195203
```
196204
some code. block
197205
```
198206
sentence with *italics* should not be ignored. truly.
199207
git log main.. compiler
208+
foo. bar. baz
200209
";
201210
let expected = "\
202211
# some. heading
203212
must!
204213
be;
205214
split?
206-
and.
207-
normalizes space
208-
1. ignore numbered
215+
1. ignore a dot after number.
216+
but no further
209217
ignore | tables
210218
ignore e.g. and
211219
ignore i.e. and
212220
ignore E.g. too
213-
- ignore. list
214-
* ignore. list
221+
- list.
222+
entry
223+
* list.
224+
entry
215225
```
216226
some code. block
217227
```
218228
sentence with *italics* should not be ignored.
219229
truly.
220230
git log main.. compiler
231+
foo.
232+
bar.
233+
baz
221234
";
222235
assert_eq!(expected, comply(original));
223236
}

0 commit comments

Comments
 (0)