From 208ea8ea4c7b680380a65d5a3795eedbcd10be9e Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sat, 22 Feb 2025 00:30:59 +0800 Subject: [PATCH 1/2] Alert if the commit message is written in Chinese The hook checks for the presence of characters in the typical Chinese Unicode range (e.g., CJK Unified Ideographs). Change-Id: I186bfd3ffd79868ddb1f51b4052fa4397fe19be5 --- scripts/aspell-pws | 1 + scripts/commit-msg.hook | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/aspell-pws b/scripts/aspell-pws index 00d2a452b..6deba882c 100644 --- a/scripts/aspell-pws +++ b/scripts/aspell-pws @@ -300,3 +300,4 @@ sed changeid en msg +cjk diff --git a/scripts/commit-msg.hook b/scripts/commit-msg.hook index d542d0bea..c9b6979f3 100755 --- a/scripts/commit-msg.hook +++ b/scripts/commit-msg.hook @@ -277,7 +277,7 @@ validate_commit_message() { add_warning 1 "Avoid mentioning C source filenames in the commit subject" fi - # 11a. Disallow parentheses in the commit subject. + # 7b. Disallow parentheses in the commit subject. if [[ ${COMMIT_SUBJECT_TO_PROCESS} =~ [\(\)] ]]; then add_warning 1 "Avoid using parentheses '()' in commit subjects" fi @@ -334,6 +334,13 @@ validate_commit_message() { # 13. Always use American English. # ------------------------------------------------------------------------------ + # Alert if the commit message appears to be written in Chinese. + # This pattern matches any Chinese character (common CJK Unified Ideographs). + MISSPELLED_WORDS=$(echo "$FULL_COMMIT_MSG" | grep "[一-龥]") + if [ -n "$MISSPELLED_WORDS" ]; then + add_warning 1 "Commit message appears to be written in Chinese: $MISSPELLED_WORDS" + fi + # Use aspell to list misspelled words according to American English. MISSPELLED_WORDS=$(echo "$FULL_COMMIT_MSG" | $ASPELL --lang=en --list --home-dir=scripts --personal=aspell-pws) if [ -n "$MISSPELLED_WORDS" ]; then From a1da15f9c8027fe4039acc79489ab0178b4b72b5 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sat, 22 Feb 2025 00:46:38 +0800 Subject: [PATCH 2/2] Remove quoted text for spell checking This removes any text enclosed in single or double quotes. Change-Id: Id65506bb0f4470ecf4ece58cb88f4c9f22d9afd7 --- scripts/aspell-pws | 31 +++++++++++++++++++++++++++++++ scripts/commit-msg.hook | 7 +++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/scripts/aspell-pws b/scripts/aspell-pws index 6deba882c..6acafb22b 100644 --- a/scripts/aspell-pws +++ b/scripts/aspell-pws @@ -301,3 +301,34 @@ changeid en msg cjk +massif +csapp +cmu +ele +lima +github +ih +it +AddressSanitizer +asan +dbg +dev +dpkg +apt +siglongjmp +sigsetjmp +SIGABRT +SIGALRM +SIGBUS +SIGFPE +SIGHUP +SIGILL +SIGINT +SIGKILL +SIGPIPE +SIGQUIT +SIGSEGV +SIGTERM +SIGTRAP +SIGUSR +SIGWINCH diff --git a/scripts/commit-msg.hook b/scripts/commit-msg.hook index c9b6979f3..747d66fb4 100755 --- a/scripts/commit-msg.hook +++ b/scripts/commit-msg.hook @@ -341,8 +341,11 @@ validate_commit_message() { add_warning 1 "Commit message appears to be written in Chinese: $MISSPELLED_WORDS" fi - # Use aspell to list misspelled words according to American English. - MISSPELLED_WORDS=$(echo "$FULL_COMMIT_MSG" | $ASPELL --lang=en --list --home-dir=scripts --personal=aspell-pws) + # Remove quoted text from FULL_COMMIT_MSG for spell checking. + MSG_FOR_SPELLCHECK=$(echo "$FULL_COMMIT_MSG" | sed -E "s/(['\"][^'\"]*['\"])//g") + + # Use aspell to list misspelled words according to American English, ignoring quoted text. + MISSPELLED_WORDS=$(echo "$MSG_FOR_SPELLCHECK" | $ASPELL --lang=en --list --home-dir=scripts --personal=aspell-pws) if [ -n "$MISSPELLED_WORDS" ]; then add_warning 1 "Avoid using non-American English words" fi