Skip to content

Commit d5b4054

Browse files
committed
fix: inconsistent capitalization - improve word sequence break
1 parent cc91b5e commit d5b4054

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

src/commands/inconsistent-capitalization.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ export function registerListInconsistentCapitalizationCommand (context, outputCh
2323
const ignoreAnchorPropTextRgx = /anchor="(?<term>.+?)"/gi
2424
const results = findInconsistentCapitalization(
2525
activeDoc.getText()
26-
.replaceAll(ignoreXmlTagsRgx, (_, p1) => `<${repeat('_', p1.length)}>`)
2726
.replaceAll(ignoreNameTagTextRgx, (_, p1) => `<name>${repeat('_', p1.length)}</name>`)
2827
.replaceAll(ignoreAnchorPropTextRgx, (_, p1) => `anchor="${repeat('_', p1.length)}"`)
28+
.replaceAll(ignoreXmlTagsRgx, (_, p1) => `<${repeat('_', p1.length)}>`)
2929
)
3030

3131
outputChannel.clear()
@@ -107,10 +107,10 @@ function findInconsistentCapitalization(text) {
107107
let nextIsStart = true
108108

109109
rawLines.forEach((lineText, lineIndex) => {
110-
const lineTokens = lineText.match(/[a-zA-Z0-9']+|[.!?>]+/g) || []
110+
const lineTokens = lineText.match(/[a-zA-Z0-9'-]+|[.:;=!?>]+/g) || []
111111

112112
for (const t of lineTokens) {
113-
if (/^[.!?>]+$/.test(t)) {
113+
if (/^[.:;=!?>]+$/.test(t)) {
114114
nextIsStart = true
115115
} else {
116116
tokens.push({
@@ -140,6 +140,13 @@ function findInconsistentCapitalization(text) {
140140
const tokenObj = tokens[i + j]
141141
const word = tokenObj.word
142142

143+
// Do not allow phrases to span sentence boundaries: if this token is
144+
// marked as the start of a sentence and it's not the first word in the
145+
// phrase, stop expanding the current phrase.
146+
if (j > 0 && tokenObj.isStart) {
147+
break
148+
}
149+
143150
// Flag single letters only if they are NOT in our ignored list
144151
if (word.length === 1 && !IGNORED_TERMS.has(word.toLowerCase())) {
145152
hasSingleLetterTerm = true

0 commit comments

Comments
 (0)