Skip to content

Commit 1c82495

Browse files
marcelolyncheliasjudin
authored andcommitted
ci: Retry logic for linting step (leanprover-community#34059)
We should continue our investigation in the separate mirror pipeline and do this to avoid blocking our users: for this, let's make the linting invocation auto-retry after a 20 minute timeout. We still keep a 60min step timeout for good measure, but it shouldn't be necessary.
1 parent 044b37d commit 1c82495

File tree

4 files changed

+100
-20
lines changed

4 files changed

+100
-20
lines changed

.github/build.in.yml

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -512,13 +512,18 @@ jobs:
512512
- name: lint mathlib
513513
if: ${{ always() && steps.build.outcome == 'success' || steps.build.outcome == 'failure' }}
514514
id: lint
515-
timeout-minutes: 30
515+
timeout-minutes: 40
516516
run: |
517517
cd pr-branch
518+
set -o pipefail
518519
# Try running with --trace; if it fails due to argument parsing, the PR needs to merge master
519520
# We use .lake/ for the output file because landrun restricts /tmp access
520-
if ! env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee .lake/lint_output.txt; then
521-
if grep -q "cannot parse arguments" .lake/lint_output.txt; then
521+
for attempt in 1 2; do
522+
if timeout 15m env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee ".lake/lint_output_attempt_${attempt}.txt"; then
523+
break
524+
fi
525+
status=$?
526+
if grep -q "cannot parse arguments" ".lake/lint_output_attempt_${attempt}.txt"; then
522527
echo ""
523528
echo "=============================================================================="
524529
echo "ERROR: Your branch uses an older version of runLinter that doesn't support"
@@ -531,9 +536,24 @@ jobs:
531536
echo " git push"
532537
echo "=============================================================================="
533538
echo ""
539+
exit 1
534540
fi
535-
exit 1
536-
fi
541+
if [ "$status" -eq 124 ]; then
542+
echo "runLinter timed out (attempt $attempt)."
543+
if [ "$attempt" -lt 2 ]; then
544+
echo "Checking for runLinter processes..."
545+
if ps -eo pid,lstart,command | grep -F runLinter | grep -v grep; then
546+
echo "Killing runLinter processes..."
547+
pkill -f runLinter || true
548+
else
549+
echo "No stray runLinter processes found."
550+
fi
551+
echo "Retrying runLinter after timeout..."
552+
continue
553+
fi
554+
fi
555+
exit $status
556+
done
537557
538558
- name: end gh-problem-match-wrap for shake and lint steps
539559
uses: leanprover-community/gh-problem-matcher-wrap@20007cb926a46aa324653a387363b52f07709845 # 2025-04-23

.github/workflows/bors.yml

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -522,13 +522,18 @@ jobs:
522522
- name: lint mathlib
523523
if: ${{ always() && steps.build.outcome == 'success' || steps.build.outcome == 'failure' }}
524524
id: lint
525-
timeout-minutes: 30
525+
timeout-minutes: 40
526526
run: |
527527
cd pr-branch
528+
set -o pipefail
528529
# Try running with --trace; if it fails due to argument parsing, the PR needs to merge master
529530
# We use .lake/ for the output file because landrun restricts /tmp access
530-
if ! env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee .lake/lint_output.txt; then
531-
if grep -q "cannot parse arguments" .lake/lint_output.txt; then
531+
for attempt in 1 2; do
532+
if timeout 15m env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee ".lake/lint_output_attempt_${attempt}.txt"; then
533+
break
534+
fi
535+
status=$?
536+
if grep -q "cannot parse arguments" ".lake/lint_output_attempt_${attempt}.txt"; then
532537
echo ""
533538
echo "=============================================================================="
534539
echo "ERROR: Your branch uses an older version of runLinter that doesn't support"
@@ -541,9 +546,24 @@ jobs:
541546
echo " git push"
542547
echo "=============================================================================="
543548
echo ""
549+
exit 1
544550
fi
545-
exit 1
546-
fi
551+
if [ "$status" -eq 124 ]; then
552+
echo "runLinter timed out (attempt $attempt)."
553+
if [ "$attempt" -lt 2 ]; then
554+
echo "Checking for runLinter processes..."
555+
if ps -eo pid,lstart,command | grep -F runLinter | grep -v grep; then
556+
echo "Killing runLinter processes..."
557+
pkill -f runLinter || true
558+
else
559+
echo "No stray runLinter processes found."
560+
fi
561+
echo "Retrying runLinter after timeout..."
562+
continue
563+
fi
564+
fi
565+
exit $status
566+
done
547567
548568
- name: end gh-problem-match-wrap for shake and lint steps
549569
uses: leanprover-community/gh-problem-matcher-wrap@20007cb926a46aa324653a387363b52f07709845 # 2025-04-23

.github/workflows/build.yml

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -528,13 +528,18 @@ jobs:
528528
- name: lint mathlib
529529
if: ${{ always() && steps.build.outcome == 'success' || steps.build.outcome == 'failure' }}
530530
id: lint
531-
timeout-minutes: 30
531+
timeout-minutes: 40
532532
run: |
533533
cd pr-branch
534+
set -o pipefail
534535
# Try running with --trace; if it fails due to argument parsing, the PR needs to merge master
535536
# We use .lake/ for the output file because landrun restricts /tmp access
536-
if ! env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee .lake/lint_output.txt; then
537-
if grep -q "cannot parse arguments" .lake/lint_output.txt; then
537+
for attempt in 1 2; do
538+
if timeout 15m env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee ".lake/lint_output_attempt_${attempt}.txt"; then
539+
break
540+
fi
541+
status=$?
542+
if grep -q "cannot parse arguments" ".lake/lint_output_attempt_${attempt}.txt"; then
538543
echo ""
539544
echo "=============================================================================="
540545
echo "ERROR: Your branch uses an older version of runLinter that doesn't support"
@@ -547,9 +552,24 @@ jobs:
547552
echo " git push"
548553
echo "=============================================================================="
549554
echo ""
555+
exit 1
550556
fi
551-
exit 1
552-
fi
557+
if [ "$status" -eq 124 ]; then
558+
echo "runLinter timed out (attempt $attempt)."
559+
if [ "$attempt" -lt 2 ]; then
560+
echo "Checking for runLinter processes..."
561+
if ps -eo pid,lstart,command | grep -F runLinter | grep -v grep; then
562+
echo "Killing runLinter processes..."
563+
pkill -f runLinter || true
564+
else
565+
echo "No stray runLinter processes found."
566+
fi
567+
echo "Retrying runLinter after timeout..."
568+
continue
569+
fi
570+
fi
571+
exit $status
572+
done
553573
554574
- name: end gh-problem-match-wrap for shake and lint steps
555575
uses: leanprover-community/gh-problem-matcher-wrap@20007cb926a46aa324653a387363b52f07709845 # 2025-04-23

.github/workflows/build_fork.yml

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -526,13 +526,18 @@ jobs:
526526
- name: lint mathlib
527527
if: ${{ always() && steps.build.outcome == 'success' || steps.build.outcome == 'failure' }}
528528
id: lint
529-
timeout-minutes: 30
529+
timeout-minutes: 40
530530
run: |
531531
cd pr-branch
532+
set -o pipefail
532533
# Try running with --trace; if it fails due to argument parsing, the PR needs to merge master
533534
# We use .lake/ for the output file because landrun restricts /tmp access
534-
if ! env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee .lake/lint_output.txt; then
535-
if grep -q "cannot parse arguments" .lake/lint_output.txt; then
535+
for attempt in 1 2; do
536+
if timeout 15m env LEAN_ABORT_ON_PANIC=1 stdbuf -oL lake exe runLinter --trace Mathlib 2>&1 | tee ".lake/lint_output_attempt_${attempt}.txt"; then
537+
break
538+
fi
539+
status=$?
540+
if grep -q "cannot parse arguments" ".lake/lint_output_attempt_${attempt}.txt"; then
536541
echo ""
537542
echo "=============================================================================="
538543
echo "ERROR: Your branch uses an older version of runLinter that doesn't support"
@@ -545,9 +550,24 @@ jobs:
545550
echo " git push"
546551
echo "=============================================================================="
547552
echo ""
553+
exit 1
548554
fi
549-
exit 1
550-
fi
555+
if [ "$status" -eq 124 ]; then
556+
echo "runLinter timed out (attempt $attempt)."
557+
if [ "$attempt" -lt 2 ]; then
558+
echo "Checking for runLinter processes..."
559+
if ps -eo pid,lstart,command | grep -F runLinter | grep -v grep; then
560+
echo "Killing runLinter processes..."
561+
pkill -f runLinter || true
562+
else
563+
echo "No stray runLinter processes found."
564+
fi
565+
echo "Retrying runLinter after timeout..."
566+
continue
567+
fi
568+
fi
569+
exit $status
570+
done
551571
552572
- name: end gh-problem-match-wrap for shake and lint steps
553573
uses: leanprover-community/gh-problem-matcher-wrap@20007cb926a46aa324653a387363b52f07709845 # 2025-04-23

0 commit comments

Comments
 (0)