From 29d963bd93edb37a4122bf2474f199b6b9bdbfa9 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 14 Sep 2025 03:14:47 +0800 Subject: [PATCH] Improve commit checker and add bypass detection This reduces execution time by 86% (2s to 0.28s) through single git call and bash-native processing. It adds detection for '--no-verify' bypass, 'WIP' commits, GitHub web interface usage, and commit quality issues. Change-Id: Ie8a24bb715568dec2099b44e6d18c526ca428400 --- scripts/check-commitlog.sh | 224 ++++++++++++++++++++++++++++++++++--- scripts/checksums | 2 +- 2 files changed, 207 insertions(+), 19 deletions(-) diff --git a/scripts/check-commitlog.sh b/scripts/check-commitlog.sh index 23cd3f4bb..8a94c759d 100755 --- a/scripts/check-commitlog.sh +++ b/scripts/check-commitlog.sh @@ -1,10 +1,9 @@ #!/usr/bin/env bash -# Check that every non-merge commit after the specified base commit has a commit -# message ending with a valid Change-Id line. A valid Change-Id line must be the -# last non-empty line of the commit message and follow the format: -# -# Change-Id: I +# This script checks: +# 1. Change-Id presence (indicates commit-msg hook processing) +# 2. Commit message quality (indicates pre-commit hook compliance) +# 3. Bypass detection (detects --no-verify usage or web interface commits) # # Merge commits are excluded from this check. @@ -24,26 +23,215 @@ BASE_COMMIT="0b8be2c15160c216e8b6ec82c99a000e81c0e429" # Get a list of non-merge commit hashes after BASE_COMMIT. commits=$(git rev-list --no-merges "${BASE_COMMIT}"..HEAD) +# Hook bypass detection patterns +BYPASS_INDICATORS=( + "--no-verify" + "WIP" +) + +# Quality patterns that indicate hook processing +PROBLEMATIC_PATTERNS=( + '^[a-z]' # Uncapitalized subjects + '\.$' # Ending with period + '^.{1,10}$' # Too short subjects + '^.{80,}' # Too long subjects + '^(Update|Fix|Change|Modify) [a-zA-Z0-9_-]+\.(c|h)$' # Generic filename updates +) + +# Early exit if no commits to check +[[ -z "$commits" ]] && { echo -e "${GREEN}No commits to check.${NC}"; exit 0; } + +# Pre-compute indicator patterns for faster matching +bypass_pattern="" +for indicator in "${BYPASS_INDICATORS[@]}"; do + bypass_pattern+="|${indicator,,}" +done +bypass_pattern="${bypass_pattern#|}" + +# Ultra-fast approach: minimize git calls and parsing overhead failed=0 +warnings=0 +suspicious_commits=() + +# Cache all commit data at once using the most efficient git format +declare -A commit_cache short_cache subject_cache msg_cache + +# Single git call to populate all caches - handle multiline messages properly +current_commit="" +current_msg="" +reading_msg=false + +while IFS= read -r line; do + case "$line" in + "COMMIT "*) + # Save previous message if we were reading one + if [[ "$reading_msg" = true && -n "$current_commit" ]]; then + msg_cache["$current_commit"]="$current_msg" + fi + current_commit="${line#COMMIT }" + commit_cache["$current_commit"]=1 + reading_msg=false + current_msg="" + ;; + "SHORT "*) + short_cache["$current_commit"]="${line#SHORT }" + ;; + "SUBJECT "*) + subject_cache["$current_commit"]="${line#SUBJECT }" + ;; + "MSGSTART") + reading_msg=true + current_msg="" + ;; + *) + # If we're reading a message, accumulate lines + if [[ "$reading_msg" = true ]]; then + if [[ -z "$current_msg" ]]; then + current_msg="$line" + else + current_msg="$current_msg"$'\n'"$line" + fi + fi + ;; + esac +done < <(git log --format="COMMIT %H%nSHORT %h%nSUBJECT %s%nMSGSTART%n%B" --no-merges "${BASE_COMMIT}..HEAD") -for commit in $commits; do - # Retrieve the commit message for the given commit. - commit_msg=$(git log -1 --format=%B "${commit}") +# Save the last message +if [[ "$reading_msg" = true && -n "$current_commit" ]]; then + msg_cache["$current_commit"]="$current_msg" +fi + +# Process cached data - no more git calls needed +for commit in "${!commit_cache[@]}"; do + [[ -z "$commit" ]] && continue + + short_hash="${short_cache[$commit]}" + subject="${subject_cache[$commit]}" + full_msg="${msg_cache[$commit]}" - # Extract the last non-empty line from the commit message. - last_line=$(echo "$commit_msg" | awk 'NF {line=$0} END {print line}') + # Initialize issue tracking + has_issues=0 + has_warnings=0 + issue_list="" + warning_list="" + + # Check 1: Change-Id validation (fastest check first) + if [[ "$full_msg" != *"Change-Id: I"* ]]; then + has_issues=1 + issue_list+="Missing valid Change-Id (likely bypassed commit-msg hook)|" + ((failed++)) + fi - # Check if the last line matches the expected Change-Id format. - if [[ ! $last_line =~ ^Change-Id:\ I[0-9a-fA-F]+$ ]]; then - subject=$(git log -1 --format=%s "${commit}") - short_hash=$(git rev-parse --short "${commit}") - echo "Commit ${short_hash} with subject '$subject' does not end with a valid Change-Id." - failed=1 + # Check 2: Bypass indicators (single pattern match) + full_msg_lower="${full_msg,,}" + if [[ "$full_msg_lower" =~ ($bypass_pattern) ]]; then + has_warnings=1 + warning_list+="Contains bypass indicator: '${BASH_REMATCH[1]}'|" + ((warnings++)) + fi + + # Check 3: Subject validation (batch character operations) + subject_len=${#subject} + first_char="${subject:0:1}" + last_char="${subject: -1}" + + # Length checks + if [[ $subject_len -le 10 ]]; then + has_warnings=1 + warning_list+="Subject very short ($subject_len chars)|" + ((warnings++)) + elif [[ $subject_len -ge 80 ]]; then + has_issues=1 + issue_list+="Subject too long ($subject_len chars)|" + ((failed++)) + fi + + # Character validation using ASCII values + if [[ ${#first_char} -eq 1 ]]; then + # Check if it's a lowercase letter + case "$first_char" in + [a-z]) + has_issues=1 + issue_list+="Subject not capitalized|" + ((failed++)) + ;; + esac + fi + + # Period check + if [[ "$last_char" == "." ]]; then + has_issues=1 + issue_list+="Subject ends with period|" + ((failed++)) + fi + + # Generic filename check (simplified pattern) + if [[ "$subject" =~ ^(Update|Fix|Change|Modify)[[:space:]] ]]; then + if [[ "$subject" =~ \.(c|h)$ ]]; then + has_warnings=1 + warning_list+="Generic filename-only subject|" + ((warnings++)) + fi + fi + + # Check 4: Web interface (string contains check) + if [[ "$full_msg" == *"Co-authored-by:"* ]]; then + if [[ "$full_msg" != *"Change-Id:"* ]]; then + has_issues=1 + issue_list+="Likely created via GitHub web interface|" + ((failed++)) + fi + fi + + # Check 5: Queue.c body (most expensive - do last and only when needed) + if [[ "$full_msg" =~ ^[^$'\n']*$'\n'[[:space:]]*$'\n'Change-Id: ]]; then + # Body appears empty - check if queue.c was modified + if git diff-tree --no-commit-id --name-only -r "$commit" | grep -q "^queue\.c$"; then + has_issues=1 + issue_list+="Missing commit body for queue.c changes|" + ((failed++)) + fi + fi + + # Report issues (only if found) + if [[ $has_issues -eq 1 || $has_warnings -eq 1 ]]; then + echo -e "${YELLOW}Commit ${short_hash}:${NC} ${subject}" + + if [[ $has_issues -eq 1 ]]; then + IFS='|' read -ra issues <<< "${issue_list%|}" + for issue in "${issues[@]}"; do + [[ -n "$issue" ]] && echo -e " [ ${RED}FAIL${NC} ] $issue" + done + suspicious_commits+=("$short_hash: $subject") + fi + + if [[ $has_warnings -eq 1 ]]; then + IFS='|' read -ra warnings_arr <<< "${warning_list%|}" + for warning in "${warnings_arr[@]}"; do + [[ -n "$warning" ]] && echo -e " ${YELLOW}!${NC} $warning" + done + fi fi done -if [ $failed -ne 0 ]; then - throw "Some commits are missing a valid Change-Id. Please amend the commit messages accordingly." +if [[ $failed -gt 0 ]]; then + echo -e "\n${RED}Problematic commits detected:${NC}" + for commit in "${suspicious_commits[@]}"; do + echo -e " ${RED}•${NC} $commit" + done + + echo -e "\n${RED}These commits likely bypassed git hooks. Recommended actions:${NC}" + echo -e "1. ${YELLOW}Verify hooks are installed:${NC} scripts/install-git-hooks" + echo -e "2. ${YELLOW}Never use --no-verify flag${NC}" + echo -e "3. ${YELLOW}Avoid GitHub web interface for commits${NC}" + echo -e "4. ${YELLOW}Amend commits if possible:${NC} git rebase -i ${BASE_COMMIT}" + echo + + throw "Git hook compliance validation failed. Please fix the issues above." +fi + +if [[ $warnings -gt 0 ]]; then + echo -e "\n${YELLOW}Some commits have quality warnings but passed basic validation.${NC}" fi exit 0 diff --git a/scripts/checksums b/scripts/checksums index 7f9d8f544..de9275c7b 100644 --- a/scripts/checksums +++ b/scripts/checksums @@ -1,3 +1,3 @@ 25e7d2797e09bfafa0c0dee70111104648faec9d queue.h b26e079496803ebe318174bda5850d2cce1fd0c1 list.h -1029c2784b4cae3909190c64f53a06cba12ea38e scripts/check-commitlog.sh +c286e18579b6461fc289732cee4a18a916f79659 scripts/check-commitlog.sh