Skip to content

Handling outdated and misleading comments in maybeHandleDelayedWriteBookieFailure #8

Handling outdated and misleading comments in maybeHandleDelayedWriteBookieFailure

Handling outdated and misleading comments in maybeHandleDelayedWriteBookieFailure #8

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Description:
# This GitHub Actions workflow enables rerunning CI via PR/Issue comments using the /bkbot command.
# Supported commands: /bkbot rerun [keyword]
# - /bkbot rerun => Rerun the latest run of each workflow under the same head SHA, limited to runs with a conclusion of failure/cancelled/timed_out/skipped (entire run).
# - /bkbot rerun <keyword> => Regardless of workflow/job status, fetch all jobs in the latest runs, match by name, and rerun each matching job.
# Logging instructions:
# - Jobs that are failed/cancelled/timed_out/skipped are scanned from all the latest workflow runs (including those in progress), thus jobs fail/skipped during progress can be captured.
# Triggering condition: When a new comment is created containing /bkbot.
name: BookKeeper Bot
on:
issue_comment:
types: [created]
permissions:
actions: write
contents: read
jobs:
bkbot:
runs-on: ubuntu-24.04
timeout-minutes: 10
if: github.event_name == 'issue_comment' && contains(github.event.comment.body, '/bkbot')
steps:
- name: Execute bkbot command
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Supported commands:
// - /bkbot rerun
// Reruns all completed workflows with conclusions of failure/timed_out/skipped/cancelled
// If workflow is still running, cannot rerun whole workflow, just suggest using "/bkbot rerun jobname"
// - /bkbot rerun jobname
// Matches job.name by keyword, reruns matching jobs (regardless of current state, failures are logged)
// - /bkbot stop or /bkbot cancel
// Cancels all still running (queued/in_progress) workflow runs associated with the current PR
const commentBody = context.payload.comment.body.trim();
const prefix = '/bkbot';
if (!commentBody.startsWith(prefix)) {
console.log('Not a bkbot command, skipping ...');
return;
}
if (!context.payload.issue || !context.payload.issue.pull_request) {
console.error('This comment is not on a Pull Request. bkbot only works on PRs.');
return;
}
const parts = commentBody.split(/\s+/);
const sub = (parts[1] || '').toLowerCase();
const arg = parts.length > 2 ? parts.slice(2).join(' ') : '';
const supported = ['rerun', 'stop', 'cancel'];
if (!supported.includes(sub)) {
console.log(`Unsupported command '${sub}'. Supported: '/bkbot rerun [jobName?]', '/bkbot stop', '/bkbot cancel'.`);
return;
}
const prNum = context.payload.issue.number;
// Get PR info
let pr;
try {
({ data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNum
}));
} catch (e) {
console.error(`Failed to fetch PR #${prNum}: ${e.message}`);
return;
}
const headSha = pr.head.sha;
const prBranch = pr.head.ref;
const prUser = (pr.head && pr.head.user && pr.head.user.login) ? pr.head.user.login : pr.user.login;
const prUrl = pr.html_url;
console.log(`bkbot handling PR #${prNum} ${prUrl}`);
console.log(`PR branch='${prBranch}', headSha='${headSha}', author='${prUser}'`);
console.log(`Command parsed => sub='${sub}', arg='${arg || ''}'`);
// Fetch workflow runs in this repo triggered by this user on this branch, then filter by headSha
let page = 1;
const allRunsRaw = [];
while (true) {
const { data } = await github.rest.actions.listWorkflowRunsForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
actor: prUser,
branch: prBranch,
per_page: 100,
page
});
const wr = data.workflow_runs || [];
if (wr.length === 0) break;
allRunsRaw.push(...wr);
if (wr.length < 100) break;
page++;
}
const runsAtHead = allRunsRaw.filter(r => r.head_sha === headSha);
if (runsAtHead.length === 0) {
console.error(`No workflow runs found for head SHA ${headSha} on branch ${prBranch}.`);
return;
}
// Only keep the latest run for each workflow_id
runsAtHead.sort((a, b) => {
if (a.workflow_id !== b.workflow_id) return a.workflow_id - b.workflow_id;
return new Date(b.created_at) - new Date(a.created_at);
});
const latestRuns = [];
const seen = new Set();
for (const r of runsAtHead) {
if (!seen.has(r.workflow_id)) {
seen.add(r.workflow_id);
latestRuns.push(r);
}
}
function runKey(r) {
return `[run_id=${r.id}] ${r.name || '(unnamed)'} | status=${r.status} | conclusion=${r.conclusion || '-'} | ${r.html_url}`;
}
console.log('--- Latest workflow runs for this PR headSHA (one per workflow) ---');
for (const r of latestRuns) console.log('- ' + runKey(r));
// Utility: list all jobs in a run
async function listAllJobs(runId) {
let jobs = [];
let p = 1;
while (true) {
const { data } = await github.rest.actions.listJobsForWorkflowRun({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: runId,
per_page: 100,
page: p
});
const js = data.jobs || [];
if (js.length === 0) break;
jobs.push(...js);
if (js.length < 100) break;
p++;
}
return jobs;
}
// Utility: rerun a single job
async function rerunJob(job, run) {
try {
if (github.rest.actions.reRunJobForWorkflowRun) {
await github.rest.actions.reRunJobForWorkflowRun({
owner: context.repo.owner,
repo: context.repo.repo,
job_id: job.id
});
} else {
await github.request('POST /repos/{owner}/{repo}/actions/jobs/{job_id}/rerun', {
owner: context.repo.owner,
repo: context.repo.repo,
job_id: job.id
});
}
console.log(`Re-ran job '${job.name}' (job_id=${job.id}) in run '${run.name}' | ${run.html_url}`);
return true;
} catch (e) {
console.log(`Failed to re-run job '${job.name}' (job_id=${job.id}) in run '${run.name}': ${e.message}`);
return false;
}
}
// Command 1: /bkbot rerun
if (sub === 'rerun' && !arg) {
const targetConclusions = new Set(['failure', 'timed_out', 'cancelled', 'skipped']);
let fullRerunCount = 0;
let skippedRunning = 0;
let skippedConclusion = 0;
console.log('Mode: full workflow re-run for completed runs with conclusions in [failure,timed_out,cancelled,skipped].');
for (const r of latestRuns) {
if (r.status !== 'completed') {
console.log(`Skip (still running) ${runKey(r)}. Cannot re-run whole workflow. Consider '/bkbot rerun <jobName>' for single job.`);
skippedRunning++;
continue;
}
if (!targetConclusions.has(r.conclusion)) {
console.log(`Skip (conclusion not eligible) ${runKey(r)}`);
skippedConclusion++;
continue;
}
try {
await github.rest.actions.reRunWorkflow({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: r.id
});
console.log(`Triggered full re-run for ${runKey(r)}`);
fullRerunCount++;
} catch (e) {
console.log(`Failed to trigger full re-run for ${runKey(r)}: ${e.message}`);
}
}
if (fullRerunCount === 0) {
console.error(`No eligible workflow runs to re-run. Skipped running=${skippedRunning}, skipped by conclusion=${skippedConclusion}.`);
} else {
console.log(`Finished. Triggered full re-run for ${fullRerunCount} workflow run(s). Skipped running=${skippedRunning}, skipped by conclusion=${skippedConclusion}.`);
}
return;
}
// Command 2: /bkbot rerun jobname
if (sub === 'rerun' && arg) {
const keyword = arg.trim();
console.log(`Mode: job-level re-run. keyword='${keyword}'`);
let matchedJobs = 0;
let successJobs = 0;
for (const r of latestRuns) {
let jobs = [];
try {
jobs = await listAllJobs(r.id);
} catch (e) {
console.log(`Failed to list jobs for ${runKey(r)}: ${e.message}`);
continue;
}
for (const j of jobs) {
if (j.name && j.name.includes(keyword)) {
matchedJobs++;
const ok = await rerunJob(j, r);
if (ok) successJobs++;
}
}
}
if (matchedJobs === 0) {
console.error(`No jobs matched keyword '${keyword}' among latest runs for this PR head.`);
} else {
console.log(`Finished. Matched ${matchedJobs} job(s); successfully requested re-run for ${successJobs} job(s).`);
}
return;
}
// Command 3: /bkbot stop or /bkbot cancel
if (sub === 'stop' || sub === 'cancel') {
console.log('Mode: cancel running workflow runs (queued/in_progress).');
let cancelCount = 0;
let alreadyCompleted = 0;
for (const r of latestRuns) {
if (r.status === 'completed') {
console.log(`Skip (already completed) ${runKey(r)}`);
alreadyCompleted++;
continue;
}
try {
await github.rest.actions.cancelWorkflowRun({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: r.id
});
console.log(`Cancel requested for ${runKey(r)}`);
cancelCount++;
} catch (e) {
console.log(`Failed to cancel ${runKey(r)}: ${e.message}`);
}
}
if (cancelCount === 0) {
console.error(`No running workflow runs to cancel. Already completed: ${alreadyCompleted}.`);
} else {
console.log(`Finished. Requested cancel for ${cancelCount} running workflow run(s). Already completed: ${alreadyCompleted}.`);
}
return;
}