From 4038a616c8d8937e4ee990011e0f89d2848508a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Sierant?= Date: Sat, 23 Aug 2025 23:54:30 +0200 Subject: [PATCH 1/4] Fix races in pre-commit checks and improve error handling --- .githooks/pre-commit | 130 +++++++++++++++++++++++++++++------------ scripts/funcs/printing | 1 + 2 files changed, 95 insertions(+), 36 deletions(-) diff --git a/.githooks/pre-commit b/.githooks/pre-commit index d68ef1ac7..604561edc 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -127,49 +127,112 @@ function regenerate_public_rbac_multi_cluster() { } function update_licenses() { - echo 'regenerating licenses' - time scripts/evergreen/update_licenses.sh 2>&1 | prepend "update_licenses" - git add LICENSE-THIRD-PARTY + if [[ "${MDB_UPDATE_LICENSES:-""}" == "true" ]]; then + echo 'regenerating licenses' + time scripts/evergreen/update_licenses.sh 2>&1 | prepend "update_licenses" + git add LICENSE-THIRD-PARTY + fi } function check_erroneous_kubebuilder_annotations() { # Makes sure there are not erroneous kubebuilder annotations that can # end up in CRDs as descriptions. if grep "// kubebuilder" ./* -r --exclude-dir=vendor --include=\*.go; then - echo "Found an erroneous kubebuilder annotation" + echo -e "${RED}Found an erroneous kubebuilder annotation${NO_COLOR}" exit 1 fi } function check_incorrect_makefile_variable_brackets() { if find . -name "Makefile" | grep -v vendor | xargs grep "\${"; then - echo 'ERROR: Makefiles should NEVER contain curly brackets variables' + echo -e "${RED}ERROR: Makefiles should NEVER contain curly brackets variables${NO_COLOR}" exit 1 fi } +function validate_snippets() { + echo scripts/code_snippets/validate_snippets.py +} + +# Helper function to capture background job PID with job name +capture_bg_job() { + local job_name="$1" + local job_pid=$! + bg_job_pids+=("${job_pid}") + bg_job_pids_with_names+=("${job_pid}:${job_name}") + echo "Started ${job_name} with PID: ${job_pid}" +} + +get_job_name() { + local search_pid="$1" + local match + match=$(printf '%s\n' "${bg_job_pids_with_names[@]}" | grep "^${search_pid}:") + echo "${match#*:}" # Remove everything up to and including the colon +} + function pre_commit() { - if [[ "${MDB_UPDATE_LICENSES:-""}" == "true" ]]; then - ( (time update_licenses) 2>&1 | prepend "update_licenses" ) & - fi - ( (time scripts/evergreen/lint_code.sh) 2>&1 | prepend "lint_code.sh" ) & - ( (time start_shellcheck) 2>&1 | prepend "shellcheck" ) & + # Array to store background job PIDs + # IMPORTANT: ensure each task is executed in it's own subshell following the pattern: + # time new_check 2>&1 | prepend "new_check" & + # capture_bg_job "new_check" + + # those bg_job_ vars are global and capture_bg_job is appending to them on each call + bg_job_pids=() + bg_job_pids_with_names=() + + { + # Update release.json first in case there is a newer version + time update_release_json + # We need to generate the values files first + time update_values_yaml_files + # The values files are used for generating the standalone yaml + time generate_standalone_yaml + } 2>&1 | prepend "update_jobs" & + capture_bg_job "update_jobs" + + time update_licenses 2>&1 | prepend "update_licenses" & + capture_bg_job "update_licenses" + + time scripts/evergreen/lint_code.sh 2>&1 | prepend "lint_code.sh" & + capture_bg_job "lint_code.sh" - # Update release.json first in case there is a newer version - (time update_release_json) 2>&1 | prepend "update_release_json" - # We need to generate the values files first - (time update_values_yaml_files) 2>&1 | prepend "update_values_yaml_files" - # The values files are used for generating the standalone yaml - (time generate_standalone_yaml) 2>&1 | prepend "generate_standalone_yaml" + time start_shellcheck 2>&1 | prepend "shellcheck" & + capture_bg_job "shellcheck" - ( (time regenerate_public_rbac_multi_cluster) 2>&1 | prepend "regenerate_public_rbac_multi_cluster" ) & + time validate_snippets 2>&1 | prepend "validate_snippets" & + capture_bg_job "validate_snippets" + + time regenerate_public_rbac_multi_cluster 2>&1 | prepend "regenerate_public_rbac_multi_cluster" & + capture_bg_job "regenerate_public_rbac_multi_cluster" # Run black and isort on python files that have changed - ( (time python_formatting) 2>&1 | prepend "python_formatting") & + time python_formatting 2>&1 | prepend "python_formatting" & + capture_bg_job "python_formatting" + + time check_erroneous_kubebuilder_annotations 2>&1 | prepend "check_erroneous_kubebuilder_annotations" & + capture_bg_job "check_erroneous_kubebuilder_annotations" + + # add any other checks above - ( (time check_erroneous_kubebuilder_annotations) 2>&1 | prepend "check_erroneous_kubebuilder_annotations" ) & + # Wait for all background jobs and check their exit codes + failures=() + for pid in "${bg_job_pids[@]}"; do + wait "${pid}" || { + job_name=$(get_job_name "${pid}") + failures+=(" ${RED}${job_name} (PID ${pid})${NO_COLOR}") + } + done - wait + if [[ ${#failures[@]} -gt 0 ]]; then + echo -e "${RED}Some checks have failed:${NO_COLOR}" + for failure in "${failures[@]}"; do + echo -e "$failure" + done + echo -e "${RED}To see the details look for the job's logs by it's prefixed name (e.g. \"shellcheck:\").${NO_COLOR}" + exit 1 + fi + + echo -e "${GREEN}pre-commit: All checks passed!${NO_COLOR}" } # Function to run shellcheck on a single file @@ -177,27 +240,22 @@ run_shellcheck() { local file="$1" echo "Running shellcheck on $file" if ! shellcheck --color=always -x "$file" -e SC2154 -e SC1091 -e SC1090 -e SC2148 -o require-variable-braces -P "scripts"; then - echo "shellcheck failed on $file" + echo -e "${RED}shellcheck failed on $file${NO_COLOR}" exit 1 fi } +# Export function so it's available in subshells +export -f run_shellcheck start_shellcheck() { - files_1=$(find scripts -type f -name "*.sh") - files_2=$(find scripts/dev/contexts -type f | grep -v private-context) - files_3=$(find scripts/funcs -type f) - files_4=$(find public/architectures -type f -name "*.sh") - files=$(echo -e "$files_1\n$files_2\n$files_3\n$files_4") - # Process each file in parallel - for file in $files; do - run_shellcheck "$file" & - done - - # Wait for all background jobs - for job in $(jobs -p); do - wait "$job" || exit 1 - done - + # shellcheck disable=SC2016 + { + find scripts -type f -name "*.sh" + find scripts/dev/contexts -type f | grep -v private-context + find scripts/funcs -type f + find public/architectures -type f -name "*.sh" + find docs/ -type f -name "*.sh" + } | xargs -I {} -P 20 bash -c 'run_shellcheck "$1"' _ {} } cmd=${1:-"pre-commit"} diff --git a/scripts/funcs/printing b/scripts/funcs/printing index 0ee4b5ec0..71f4649c7 100644 --- a/scripts/funcs/printing +++ b/scripts/funcs/printing @@ -20,4 +20,5 @@ prepend() { } export RED='\033[0;31m' +export GREEN='\033[0;32m' export NO_COLOR='\033[0m' From 9926451c58ce67789c72dead5f92ed90ede831c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Sierant?= Date: Sat, 23 Aug 2025 23:57:13 +0200 Subject: [PATCH 2/4] Reverted validate_snippets check --- .githooks/pre-commit | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.githooks/pre-commit b/.githooks/pre-commit index 604561edc..613363ec3 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -150,10 +150,6 @@ function check_incorrect_makefile_variable_brackets() { fi } -function validate_snippets() { - echo scripts/code_snippets/validate_snippets.py -} - # Helper function to capture background job PID with job name capture_bg_job() { local job_name="$1" @@ -199,9 +195,6 @@ function pre_commit() { time start_shellcheck 2>&1 | prepend "shellcheck" & capture_bg_job "shellcheck" - time validate_snippets 2>&1 | prepend "validate_snippets" & - capture_bg_job "validate_snippets" - time regenerate_public_rbac_multi_cluster 2>&1 | prepend "regenerate_public_rbac_multi_cluster" & capture_bg_job "regenerate_public_rbac_multi_cluster" From 4b596b62f98f40b0fe3a4852b06f49799365522a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Sierant?= Date: Sun, 24 Aug 2025 23:05:42 +0200 Subject: [PATCH 3/4] Improved code readability --- .githooks/pre-commit | 138 ++++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 67 deletions(-) diff --git a/.githooks/pre-commit b/.githooks/pre-commit index 613363ec3..fe8c09355 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -18,14 +18,14 @@ fi mkdir -p "$(go env GOPATH)/bin" -function update_mco_tests() { +update_mco_tests() { echo "Regenerating MCO evergreen tests configuration" python scripts/evergreen/e2e/mco/create_mco_tests.py >.evergreen-mco.yml git add .evergreen-mco.yml } # Generates a yaml file to install the operator from the helm sources. -function generate_standalone_yaml() { +generate_standalone_yaml() { HELM_OPTS=$@ charttmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t 'charttmpdir') @@ -73,7 +73,7 @@ function generate_standalone_yaml() { } -function python_formatting() { +python_formatting() { # installing Black if ! command -v "black" >/dev/null; then pip install -r requirements.txt @@ -85,7 +85,7 @@ function python_formatting() { black . } -function generate_manifests() { +generate_manifests() { make manifests git add config/crd/bases @@ -93,7 +93,7 @@ function generate_manifests() { git add public/crds.yaml } -function update_values_yaml_files() { +update_values_yaml_files() { # ensure that all helm values files are up to date. # shellcheck disable=SC2154 python scripts/evergreen/release/update_helm_values_files.py @@ -107,7 +107,7 @@ function update_values_yaml_files() { git add go.sum } -function update_release_json() { +update_release_json() { # ensure that release.json is up 2 date # shellcheck disable=SC2154 python scripts/evergreen/release/update_release.py @@ -116,7 +116,7 @@ function update_release_json() { git add release.json } -function regenerate_public_rbac_multi_cluster() { +regenerate_public_rbac_multi_cluster() { if echo "$git_last_changed" | grep -q -e 'cmd/kubectl-mongodb' -e 'pkg/kubectl-mongodb'; then echo 'regenerating multicluster RBAC public example' pushd pkg/kubectl-mongodb/common/ @@ -126,7 +126,7 @@ function regenerate_public_rbac_multi_cluster() { fi } -function update_licenses() { +update_licenses() { if [[ "${MDB_UPDATE_LICENSES:-""}" == "true" ]]; then echo 'regenerating licenses' time scripts/evergreen/update_licenses.sh 2>&1 | prepend "update_licenses" @@ -134,7 +134,7 @@ function update_licenses() { fi } -function check_erroneous_kubebuilder_annotations() { +check_erroneous_kubebuilder_annotations() { # Makes sure there are not erroneous kubebuilder annotations that can # end up in CRDs as descriptions. if grep "// kubebuilder" ./* -r --exclude-dir=vendor --include=\*.go; then @@ -143,13 +143,30 @@ function check_erroneous_kubebuilder_annotations() { fi } -function check_incorrect_makefile_variable_brackets() { +check_incorrect_makefile_variable_brackets() { if find . -name "Makefile" | grep -v vendor | xargs grep "\${"; then echo -e "${RED}ERROR: Makefiles should NEVER contain curly brackets variables${NO_COLOR}" exit 1 fi } +update_jobs() { + # Update release.json first in case there is a newer version + time update_release_json + # We need to generate the values files first + time update_values_yaml_files + # The values files are used for generating the standalone yaml + time generate_standalone_yaml +} + +lint_code() { + scripts/evergreen/lint_code.sh +} + +# bg_job_ vars are global; capture_bg_job function is appending to them on each call +bg_job_pids=() +bg_job_pids_with_names=() + # Helper function to capture background job PID with job name capture_bg_job() { local job_name="$1" @@ -166,66 +183,52 @@ get_job_name() { echo "${match#*:}" # Remove everything up to and including the colon } -function pre_commit() { - # Array to store background job PIDs - # IMPORTANT: ensure each task is executed in it's own subshell following the pattern: - # time new_check 2>&1 | prepend "new_check" & - # capture_bg_job "new_check" - - # those bg_job_ vars are global and capture_bg_job is appending to them on each call - bg_job_pids=() - bg_job_pids_with_names=() - - { - # Update release.json first in case there is a newer version - time update_release_json - # We need to generate the values files first - time update_values_yaml_files - # The values files are used for generating the standalone yaml - time generate_standalone_yaml - } 2>&1 | prepend "update_jobs" & - capture_bg_job "update_jobs" - - time update_licenses 2>&1 | prepend "update_licenses" & - capture_bg_job "update_licenses" - - time scripts/evergreen/lint_code.sh 2>&1 | prepend "lint_code.sh" & - capture_bg_job "lint_code.sh" - - time start_shellcheck 2>&1 | prepend "shellcheck" & - capture_bg_job "shellcheck" - - time regenerate_public_rbac_multi_cluster 2>&1 | prepend "regenerate_public_rbac_multi_cluster" & - capture_bg_job "regenerate_public_rbac_multi_cluster" - - # Run black and isort on python files that have changed - time python_formatting 2>&1 | prepend "python_formatting" & - capture_bg_job "python_formatting" +# Executes function given on the first argument as background job. +# It's ensuring logs are properly prefixed by the name and +# the job's pid is captured in bg_jobs array in order to wait for completion. +run_job_in_background() { + job_name=$1 + time ${job_name} 2>&1 | prepend "${job_name}" & + capture_bg_job "${job_name}" +} - time check_erroneous_kubebuilder_annotations 2>&1 | prepend "check_erroneous_kubebuilder_annotations" & - capture_bg_job "check_erroneous_kubebuilder_annotations" +# Waits for all background jobs stored in bg_job_pids and check their exit codes. +wait_for_all_background_jobs() { + failures=() + for pid in "${bg_job_pids[@]}"; do + wait "${pid}" || { + job_name=$(get_job_name "${pid}") + failures+=(" ${RED}${job_name} (PID ${pid})${NO_COLOR}") + } + done - # add any other checks above + if [[ ${#failures[@]} -gt 0 ]]; then + echo -e "${RED}Some checks have failed:${NO_COLOR}" + for failure in "${failures[@]}"; do + echo -e "$failure" + done + echo -e "${RED}To see the details look for the job's logs by it's prefixed name (e.g. \"shellcheck:\").${NO_COLOR}" + return 1 + fi - # Wait for all background jobs and check their exit codes - failures=() - for pid in "${bg_job_pids[@]}"; do - wait "${pid}" || { - job_name=$(get_job_name "${pid}") - failures+=(" ${RED}${job_name} (PID ${pid})${NO_COLOR}") - } - done + return 0 +} - if [[ ${#failures[@]} -gt 0 ]]; then - echo -e "${RED}Some checks have failed:${NO_COLOR}" - for failure in "${failures[@]}"; do - echo -e "$failure" - done - echo -e "${RED}To see the details look for the job's logs by it's prefixed name (e.g. \"shellcheck:\").${NO_COLOR}" - exit 1 +pre_commit() { + run_job_in_background "update_jobs" + run_job_in_background "update_licenses" + run_job_in_background "lint_code" + run_job_in_background "start_shellcheck" + run_job_in_background "regenerate_public_rbac_multi_cluster" + run_job_in_background "python_formatting" + run_job_in_background "check_erroneous_kubebuilder_annotations" + + if wait_for_all_background_jobs; then + echo -e "${GREEN}pre-commit: All checks passed!${NO_COLOR}" + return 0 + else + return 1 fi - - echo -e "${GREEN}pre-commit: All checks passed!${NO_COLOR}" } # Function to run shellcheck on a single file @@ -237,7 +240,8 @@ run_shellcheck() { exit 1 fi } -# Export function so it's available in subshells + +# Export function so it's available in subshells (for xargs) export -f run_shellcheck start_shellcheck() { @@ -261,5 +265,5 @@ elif [[ "${cmd}" == "pre-commit" ]]; then elif [[ "${cmd}" == "shellcheck" ]]; then start_shellcheck elif [[ "${cmd}" == "lint" ]]; then - source scripts/evergreen/lint_code.sh + lint_code fi From 4110b53ee3b9cf86f46466824d7eb53e80da1271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Sierant?= Date: Mon, 25 Aug 2025 12:20:29 +0200 Subject: [PATCH 4/4] Review fixes --- .githooks/pre-commit | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/.githooks/pre-commit b/.githooks/pre-commit index fe8c09355..b645eee39 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -163,19 +163,10 @@ lint_code() { scripts/evergreen/lint_code.sh } -# bg_job_ vars are global; capture_bg_job function is appending to them on each call +# bg_job_ vars are global; run_job_in_background function is appending to them on each call bg_job_pids=() bg_job_pids_with_names=() -# Helper function to capture background job PID with job name -capture_bg_job() { - local job_name="$1" - local job_pid=$! - bg_job_pids+=("${job_pid}") - bg_job_pids_with_names+=("${job_pid}:${job_name}") - echo "Started ${job_name} with PID: ${job_pid}" -} - get_job_name() { local search_pid="$1" local match @@ -189,7 +180,11 @@ get_job_name() { run_job_in_background() { job_name=$1 time ${job_name} 2>&1 | prepend "${job_name}" & - capture_bg_job "${job_name}" + + local job_pid=$! + bg_job_pids+=("${job_pid}") + bg_job_pids_with_names+=("${job_pid}:${job_name}") + echo "Started ${job_name} with PID: ${job_pid}" } # Waits for all background jobs stored in bg_job_pids and check their exit codes.