Skip to content

Commit d39d920

Browse files
committed
Clear where non-American English appears
Previously, warnings about non-American English words in commit messages did not indicate the specific word or its exact location. This commit improves the hook by outputting the precise line number and word that triggered the warning. A new function, 'get_all_match_positions', was introduced to locate the first occurrence of each target word in a multi-line string. We now preserve blank lines in the commit message by using the variable 'FULL_COMMIT_MSG_WITH_SPACE', so that the line search functionality operates correctly. This change makes it easier for users to quickly identify and correct non-American English words in their commit messages. Change-Id: I8a2b7eb3984b06b0be6506ca4f410ca857fe50a7
1 parent bb50402 commit d39d920

File tree

1 file changed

+48
-7
lines changed

1 file changed

+48
-7
lines changed

scripts/commit-msg.hook

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,41 @@ read_commit_message() {
124124
done < $COMMIT_MSG_FILE
125125
}
126126

127+
# Get positions (line, column) for each target word in a multiline string.
128+
# Output format: "target: line"
129+
get_all_match_positions() {
130+
local text="$1"
131+
local targets="$2"
132+
local start_line=1
133+
local start_col=1
134+
135+
while IFS= read -r target; do
136+
# search for the target string
137+
local result
138+
result=$(
139+
awk -v t="$target" -v sl="$start_line" -v sc="$start_col" '{
140+
if (NR < sl) next
141+
pos = index(NR == sl ? substr($0, sc) : $0, t)
142+
if (pos) {
143+
print NR, (NR == sl ? pos + sc - 1 : pos)
144+
exit
145+
}
146+
}' <<< "$text"
147+
)
148+
149+
# skip if the target is not found
150+
[ -z "$result" ] && continue
151+
152+
# output and update states
153+
local line col
154+
read -r line col <<< "$result"
155+
echo "$target: $line"
156+
start_line="$line"
157+
start_col=$((col + 1))
158+
159+
done <<< "$targets"
160+
}
161+
127162
#
128163
# Validate the contents of the commmit msg agains the good commit guidelines.
129164
#
@@ -348,8 +383,10 @@ done
348383
# 12. Avoid abusive language in commit message content
349384
# ------------------------------------------------------------------------------
350385

351-
FULL_COMMIT_MSG=$(sed '/^#/d;/^[[:space:]]*$/d;/^[[:space:]]*Change-Id:/d' "$COMMIT_MSG_FILE" | \
352-
sed -E "s@${URL_REGEX#^}@@g")
386+
FULL_COMMIT_MSG_WITH_SPACE=$(sed '/^#/d;/^[[:space:]]*Change-Id:/d' "$COMMIT_MSG_FILE" | \
387+
sed -E "s@${URL_REGEX#^}@@g")
388+
FULL_COMMIT_MSG=$(echo "$FULL_COMMIT_MSG_WITH_SPACE" | sed '/^[[:space:]]*$/d')
389+
353390
# Extended list of abusive words (case-insensitive).
354391
# Adjust the list as needed.
355392
ABUSIVE_WORDS_REGEX='\b(fuck|fucking|dick|shit|bitch|asshole|cunt|motherfucker|damn|crap|dumbass|piss)\b'
@@ -367,16 +404,20 @@ done
367404
add_warning 1 "Commit message appears to be written in Chinese: $MISSPELLED_WORDS"
368405
fi
369406

370-
# Remove quoted text and commit hashes from $FULL_COMMIT_MSG for spell checking.
371-
# Handles commit references like "commit 7d05741" (short) or full 40-char hashes.
372-
MSG_FOR_SPELLCHECK=$(echo "$FULL_COMMIT_MSG" | sed -E \
407+
MSG_FOR_SPELLCHECK_LINE_FINDING=$(echo "$FULL_COMMIT_MSG_WITH_SPACE" | sed -E \
373408
-e "s/(['\"][^'\"]*['\"])//g" \
374409
-e "s/\bcommit[[:space:]]+[0-9a-fA-F]{7,40}\b/commit/g")
375-
410+
MSG_FOR_SPELLCHECK=$(echo "$MSG_FOR_SPELLCHECK_LINE_FINDING" | sed '/^[[:space:]]*$/d')
411+
412+
376413
# Use aspell to list misspelled words according to American English, ignoring quoted text.
377414
MISSPELLED_WORDS=$(echo "$MSG_FOR_SPELLCHECK" | $ASPELL --lang=en --list --home-dir=scripts --personal=aspell-pws)
378415
if [ -n "$MISSPELLED_WORDS" ]; then
379-
add_warning 1 "Avoid using non-American English words"
416+
results=$(get_all_match_positions "$MSG_FOR_SPELLCHECK_LINE_FINDING" "$MISSPELLED_WORDS")
417+
418+
while read -r result; do
419+
add_warning "${result#*:}" "Avoid using non-American English words: ${result%%:*}"
420+
done <<< "$results"
380421
fi
381422
}
382423

0 commit comments

Comments
 (0)