Skip to content

Improvements to documentation with regards to remote OAuth issue #821

Improvements to documentation with regards to remote OAuth issue

Improvements to documentation with regards to remote OAuth issue #821

Workflow file for this run

name: PR Review
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.event.issue.number || github.event.inputs.prNumber }}
cancel-in-progress: false
on:
pull_request_target:
types: [opened, synchronize, ready_for_review]
issue_comment:
types: [created]
workflow_dispatch:
inputs:
prNumber:
description: 'The number of the PR to review manually'
required: true
type: string
jobs:
review-pr:
# Bot check is in the issue_comment branch - workflow shows "skipped" for bot comments
if: |
github.event_name == 'workflow_dispatch' ||
(github.event.action == 'opened' && github.event.pull_request.draft == false) ||
github.event.action == 'ready_for_review' ||
(github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'Agent Monitored')) ||
(
github.event_name == 'issue_comment' &&
github.event.issue.pull_request &&
github.event.comment.user.login != 'mirrobot' &&
github.event.comment.user.login != 'mirrobot-agent' &&
github.event.comment.user.login != 'mirrobot-agent[bot]' &&
(contains(github.event.comment.body, '/mirrobot-review') || contains(github.event.comment.body, '/mirrobot_review'))
)
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
env:
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number || inputs.prNumber }}
BOT_NAMES_JSON: '["mirrobot", "mirrobot-agent", "mirrobot-agent[bot]"]'
IGNORE_BOT_NAMES_JSON: '["ellipsis-dev"]'
COMMENT_FETCH_LIMIT: '20'
REVIEW_FETCH_LIMIT: '30'
REVIEW_THREAD_FETCH_LIMIT: '40'
THREAD_COMMENT_FETCH_LIMIT: '5'
steps:
# ========================================================================
# COMMENT VALIDATION STEP (only for issue_comment events)
# ========================================================================
# Validates that trigger words are in actual content (not in quotes/code)
# If validation fails, subsequent steps are skipped
# ========================================================================
- name: Validate comment trigger
id: validate
if: github.event_name == 'issue_comment'
env:
COMMENT_BODY: ${{ github.event.comment.body }}
run: |
set -e
# Save comment to temp file for processing
TEMP_FILE=$(mktemp)
echo "$COMMENT_BODY" > "$TEMP_FILE"
# Remove fenced code blocks (```...```)
CLEAN_BODY=$(awk '
/^```/ { in_code = !in_code; next }
!in_code { print }
' "$TEMP_FILE")
# Remove inline code (`...`)
CLEAN_BODY=$(echo "$CLEAN_BODY" | sed 's/`[^`]*`//g')
# Remove quoted lines (lines starting with >)
CLEAN_BODY=$(echo "$CLEAN_BODY" | grep -v '^[[:space:]]*>' || true)
rm -f "$TEMP_FILE"
echo "Clean body after stripping quotes/code:"
echo "$CLEAN_BODY"
echo "---"
# Check for trigger words in clean text
# Trigger: /mirrobot-review or /mirrobot_review
if echo "$CLEAN_BODY" | grep -qE '/mirrobot[-_]review'; then
echo "::notice::Valid trigger found in non-quoted, non-code text."
echo "should_proceed=true" >> $GITHUB_OUTPUT
else
echo "::notice::Trigger only found in quotes/code blocks. Skipping."
echo "should_proceed=false" >> $GITHUB_OUTPUT
fi
- name: Checkout repository
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
uses: actions/checkout@v4
- name: Bot Setup
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
id: setup
uses: ./.github/actions/bot-setup
with:
bot-app-id: ${{ secrets.BOT_APP_ID }}
bot-private-key: ${{ secrets.BOT_PRIVATE_KEY }}
opencode-api-key: ${{ secrets.OPENCODE_API_KEY }}
opencode-model: ${{ secrets.OPENCODE_MODEL }}
opencode-fast-model: ${{ secrets.OPENCODE_FAST_MODEL }}
custom-providers-json: ${{ secrets.CUSTOM_PROVIDERS_JSON }}
- name: Clear pending bot review
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
env:
GH_TOKEN: ${{ steps.setup.outputs.token }}
BOT_NAMES_JSON: ${{ env.BOT_NAMES_JSON }}
run: |
pending_review_ids=$(gh api --paginate \
"/repos/${GITHUB_REPOSITORY}/pulls/${{ env.PR_NUMBER }}/reviews" \
| jq -r --argjson bots "$BOT_NAMES_JSON" '.[]? | select((.state // "") == "PENDING" and (((.user.login // "") as $login | $bots | index($login)))) | .id' \
| sort -u)
if [ -z "$pending_review_ids" ]; then
echo "No pending bot reviews to clear."
exit 0
fi
while IFS= read -r review_id; do
[ -z "$review_id" ] && continue
if gh api \
--method DELETE \
-H "Accept: application/vnd.github+json" \
"/repos/${GITHUB_REPOSITORY}/pulls/${{ env.PR_NUMBER }}/reviews/$review_id"; then
echo "Cleared pending review $review_id"
else
echo "::warning::Failed to clear pending review $review_id"
fi
done <<< "$pending_review_ids"
- name: Add reaction to PR
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
env:
GH_TOKEN: ${{ steps.setup.outputs.token }}
BOT_NAMES_JSON: ${{ env.BOT_NAMES_JSON }}
IGNORE_BOT_NAMES_JSON: ${{ env.IGNORE_BOT_NAMES_JSON }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
/repos/${{ github.repository }}/issues/${{ env.PR_NUMBER }}/reactions \
-f content='eyes'
- name: Fetch and Format Full PR Context
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
id: pr_meta
env:
GH_TOKEN: ${{ steps.setup.outputs.token }}
run: |
# Fetch core PR metadata (comments and reviews fetched via GraphQL below)
pr_json=$(gh pr view ${{ env.PR_NUMBER }} --repo ${{ github.repository }} --json author,title,body,createdAt,state,headRefName,baseRefName,headRefOid,additions,deletions,commits,files,closingIssuesReferences,headRepository)
# Fetch timeline data to find cross-references
timeline_data=$(gh api "/repos/${{ github.repository }}/issues/${{ env.PR_NUMBER }}/timeline")
repo_owner="${GITHUB_REPOSITORY%/*}"
repo_name="${GITHUB_REPOSITORY#*/}"
GRAPHQL_QUERY='query($owner:String!, $name:String!, $number:Int!, $commentLimit:Int!, $reviewLimit:Int!, $threadLimit:Int!, $threadCommentLimit:Int!) {
repository(owner: $owner, name: $name) {
pullRequest(number: $number) {
comments(last: $commentLimit) {
nodes {
databaseId
author { login }
body
createdAt
isMinimized
minimizedReason
}
}
reviews(last: $reviewLimit) {
nodes {
databaseId
author { login }
body
state
submittedAt
isMinimized
minimizedReason
}
}
reviewThreads(last: $threadLimit) {
nodes {
id
isResolved
isOutdated
comments(last: $threadCommentLimit) {
nodes {
databaseId
author { login }
body
createdAt
path
line
originalLine
diffHunk
isMinimized
minimizedReason
pullRequestReview {
databaseId
isMinimized
minimizedReason
}
}
}
}
}
}
}
}'
discussion_data=$(gh api graphql \
-F owner="$repo_owner" \
-F name="$repo_name" \
-F number=${{ env.PR_NUMBER }} \
-F commentLimit=${{ env.COMMENT_FETCH_LIMIT }} \
-F reviewLimit=${{ env.REVIEW_FETCH_LIMIT }} \
-F threadLimit=${{ env.REVIEW_THREAD_FETCH_LIMIT }} \
-F threadCommentLimit=${{ env.THREAD_COMMENT_FETCH_LIMIT }} \
-f query="$GRAPHQL_QUERY")
# Debug: Output pr_json and the discussion GraphQL payload for inspection
echo "$pr_json" > pr_json.txt
echo "$discussion_data" > discussion_data.txt
# Prepare metadata
author=$(echo "$pr_json" | jq -r .author.login)
created_at=$(echo "$pr_json" | jq -r .createdAt)
base_branch=$(echo "$pr_json" | jq -r .baseRefName)
head_branch=$(echo "$pr_json" | jq -r .headRefName)
state=$(echo "$pr_json" | jq -r .state)
additions=$(echo "$pr_json" | jq -r .additions)
deletions=$(echo "$pr_json" | jq -r .deletions)
total_commits=$(echo "$pr_json" | jq -r '.commits | length')
changed_files_count=$(echo "$pr_json" | jq -r '.files | length')
title=$(echo "$pr_json" | jq -r .title)
body=$(echo "$pr_json" | jq -r '.body // "(No description provided)"')
# Build changed files list with correct jq interpolations for additions and deletions
# Previous pattern had a missing backslash before the deletions interpolation, leaving a literal '((.deletions))'.
changed_files_list=$(echo "$pr_json" | jq -r '.files[] | "- \(.path) (MODIFIED) +\((.additions))/-\((.deletions))"')
comments=$(echo "$discussion_data" | jq -r --argjson ignored "$IGNORE_BOT_NAMES_JSON" '
((.data.repository.pullRequest.comments.nodes // [])
| map(select((.isMinimized != true) and (((.author.login? // "unknown") as $login | $ignored | index($login)) | not))))
| if length > 0 then
map("- " + (.author.login? // "unknown") + " at " + (.createdAt // "N/A") + ":\n" + ((.body // "") | tostring) + "\n")
| join("")
else
"No general comments."
end')
# ===== ACCURATE FILTERING & COUNTING (Fixed math logic) =====
# Calculate all stats using jq integers directly to avoid grep/text parsing errors
stats_json=$(echo "$discussion_data" | jq -r --argjson ignored "$IGNORE_BOT_NAMES_JSON" '
# Define filter logic
def is_valid_review:
(.author.login? // "unknown") as $login | $ignored | index($login) | not
and (.isMinimized != true);
def is_valid_comment:
.isResolved != true
and .isOutdated != true
and (((.comments.nodes // []) | first | .isMinimized) != true)
and ((((.comments.nodes // []) | first | .pullRequestReview.isMinimized) // false) != true);
def is_valid_inline:
.isMinimized != true
and ((.pullRequestReview.isMinimized // false) != true)
and (((.author.login? // "unknown") as $login | $ignored | index($login)) | not);
# Calculate Reviews
def raw_reviews: (.data.repository.pullRequest.reviews.nodes // []);
def total_reviews: (raw_reviews | length);
def included_reviews: ([raw_reviews[]? | select(is_valid_review)] | length);
# Calculate Review Comments
def raw_threads: (.data.repository.pullRequest.reviewThreads.nodes // []);
def valid_threads: (raw_threads | map(select(is_valid_comment)));
def all_valid_comments: (valid_threads | map(.comments.nodes // []) | flatten | map(select(is_valid_inline)));
# We count total comments as "active/unresolved threads comments"
def total_review_comments: (raw_threads | map(select(.isResolved != true and .isOutdated != true)) | map(.comments.nodes // []) | flatten | length);
def included_review_comments: (all_valid_comments | length);
{
total_reviews: total_reviews,
included_reviews: included_reviews,
excluded_reviews: (total_reviews - included_reviews),
total_review_comments: total_review_comments,
included_review_comments: included_review_comments,
excluded_comments: (total_review_comments - included_review_comments)
}
')
# Export stats to env vars
filtered_reviews=$(echo "$stats_json" | jq .included_reviews)
excluded_reviews=$(echo "$stats_json" | jq .excluded_reviews)
filtered_comments=$(echo "$stats_json" | jq .included_review_comments)
excluded_comments=$(echo "$stats_json" | jq .excluded_comments)
echo "✓ Filtered reviews: $filtered_reviews included, $excluded_reviews excluded (ignored bots/hidden)"
echo "✓ Filtered review comments: $filtered_comments included, $excluded_comments excluded (outdated/hidden)"
# Generate Text Content (using same filters as stats)
# Reviews Text
review_filter_err=$(mktemp 2>/dev/null || echo "/tmp/review_filter_err.log")
if reviews=$(echo "$discussion_data" | jq -r --argjson ignored "$IGNORE_BOT_NAMES_JSON" '
if ((((.data.repository.pullRequest.reviews.nodes // []) | length) > 0)) then
((.data.repository.pullRequest.reviews.nodes // [])[]?
| select(
((.author.login? // "unknown") as $login | $ignored | index($login) | not)
and (.isMinimized != true)
)
| "- " + (.author.login? // "unknown") + " at " + (.submittedAt // "N/A") + ":\n - Review body: " + (.body // "(No summary comment)") + "\n - State: " + (.state // "UNKNOWN") + "\n")
else
"No formal reviews."
end' 2>"$review_filter_err"); then
if [ -s "$review_filter_err" ]; then
echo "::debug::jq stderr (reviews) emitted output:"
cat "$review_filter_err"
fi
else
echo "::warning::Review formatting failed, using unfiltered data"
reviews="Error processing reviews."
echo "FILTER_ERROR_REVIEWS=true" >> $GITHUB_ENV
fi
rm -f "$review_filter_err" || true
# Review Comments Text
review_comment_filter_err=$(mktemp 2>/dev/null || echo "/tmp/review_comment_filter_err.log")
if review_comments=$(echo "$discussion_data" | jq -r --argjson ignored "$IGNORE_BOT_NAMES_JSON" '
((.data.repository.pullRequest.reviewThreads.nodes // [])
| map(select(
.isResolved != true and .isOutdated != true
and (((.comments.nodes // []) | first | .isMinimized) != true)
and ((((.comments.nodes // []) | first | .pullRequestReview.isMinimized) // false) != true)
))
| map(.comments.nodes // [])
| flatten
| map(select((.isMinimized != true)
and ((.pullRequestReview.isMinimized // false) != true)
and (((.author.login? // "unknown") as $login | $ignored | index($login)) | not))))
| if length > 0 then
map("- " + (.author.login? // "unknown") + " at " + (.createdAt // "N/A") + " (" + (.path // "Unknown file") + ":" + ((.line // .originalLine // "N/A") | tostring) + "):\n " + ((.body // "") | tostring) + "\n")
| join("")
else
"No inline review comments."
end' 2>"$review_comment_filter_err"); then
if [ -s "$review_comment_filter_err" ]; then
echo "::debug::jq stderr (review comments) emitted output:"
cat "$review_comment_filter_err"
fi
else
echo "::warning::Review comment formatting failed"
review_comments="Error processing review comments."
echo "FILTER_ERROR_COMMENTS=true" >> $GITHUB_ENV
fi
rm -f "$review_comment_filter_err" || true
# Store filtering statistics
echo "EXCLUDED_REVIEWS=$excluded_reviews" >> $GITHUB_ENV
echo "EXCLUDED_COMMENTS=$excluded_comments" >> $GITHUB_ENV
# Prepare linked issues robustly by fetching each one individually
linked_issues_content=""
issue_numbers=$(echo "$pr_json" | jq -r '.closingIssuesReferences[].number')
if [ -z "$issue_numbers" ]; then
linked_issues="No issues are formally linked for closure by this PR."
else
for number in $issue_numbers; do
issue_details_json=$(gh issue view "$number" --repo "${{ github.repository }}" --json title,body 2>/dev/null || echo "{}")
issue_title=$(echo "$issue_details_json" | jq -r '.title // "Title not available"')
issue_body=$(echo "$issue_details_json" | jq -r '.body // "Body not available"')
linked_issues_content+=$(printf "<issue>\n <number>#%s</number>\n <title>%s</title>\n <body>\n%s\n</body>\n</issue>\n" "$number" "$issue_title" "$issue_body")
done
linked_issues=$linked_issues_content
fi
# Prepare cross-references from timeline data
references=$(echo "$timeline_data" | jq -r '.[] | select(.event == "cross-referenced") | .source.issue | "- Mentioned in \(.html_url | if contains("/pull/") then "PR" else "Issue" end): #\(.number) - \(.title)"')
if [ -z "$references" ]; then references="This PR has not been mentioned in other issues or PRs."; fi
# Build filtering summary for AI context
# Ensure numeric fallbacks so blanks never appear if variables are empty
filter_summary="Context filtering applied: ${excluded_reviews:-0} reviews and ${excluded_comments:-0} review comments excluded from this context."
if [ "${FILTER_ERROR_REVIEWS}" = "true" ] || [ "${FILTER_ERROR_COMMENTS}" = "true" ]; then
filter_summary="$filter_summary"$'\n'"Warning: Some filtering operations encountered errors. Context may include items that should have been filtered."
fi
# Assemble the final context block
CONTEXT_DELIMITER="GH_PR_CONTEXT_DELIMITER_$(openssl rand -hex 8)"
echo "PULL_REQUEST_CONTEXT<<$CONTEXT_DELIMITER" >> "$GITHUB_ENV"
echo "Author: $author" >> "$GITHUB_ENV"
echo "Created At: $created_at" >> "$GITHUB_ENV"
echo "Base Branch (target): $base_branch" >> "$GITHUB_ENV"
echo "Head Branch (source): $head_branch" >> "$GITHUB_ENV"
echo "State: $state" >> "$GITHUB_ENV"
echo "Additions: $additions" >> "$GITHUB_ENV"
echo "Deletions: $deletions" >> "$GITHUB_ENV"
echo "Total Commits: $total_commits" >> "$GITHUB_ENV"
echo "Changed Files: $changed_files_count files" >> "$GITHUB_ENV"
echo "<pull_request_body>" >> "$GITHUB_ENV"
echo "$title" >> "$GITHUB_ENV"
echo "---" >> "$GITHUB_ENV"
echo "$body" >> "$GITHUB_ENV"
echo "</pull_request_body>" >> "$GITHUB_ENV"
echo "<pull_request_comments>" >> "$GITHUB_ENV"
echo "$comments" >> "$GITHUB_ENV"
echo "</pull_request_comments>" >> "$GITHUB_ENV"
echo "<pull_request_reviews>" >> "$GITHUB_ENV"
echo "$reviews" >> "$GITHUB_ENV"
echo "</pull_request_reviews>" >> "$GITHUB_ENV"
echo "<pull_request_review_comments>" >> "$GITHUB_ENV"
echo "$review_comments" >> "$GITHUB_ENV"
echo "</pull_request_review_comments>" >> "$GITHUB_ENV"
echo "<pull_request_changed_files>" >> "$GITHUB_ENV"
echo "$changed_files_list" >> "$GITHUB_ENV"
echo "</pull_request_changed_files>" >> "$GITHUB_ENV"
echo "<linked_issues>" >> "$GITHUB_ENV"
echo "$linked_issues" >> "$GITHUB_ENV"
echo "</linked_issues>" >> "$GITHUB_ENV"
echo "<cross_references>" >> "$GITHUB_ENV"
echo "$references" >> "$GITHUB_ENV"
echo "</cross_references>" >> "$GITHUB_ENV"
echo "<filtering_summary>" >> "$GITHUB_ENV"
echo "$filter_summary" >> "$GITHUB_ENV"
echo "</filtering_summary>" >> "$GITHUB_ENV"
echo "$CONTEXT_DELIMITER" >> "$GITHUB_ENV"
echo "PR_HEAD_SHA=$(echo "$pr_json" | jq -r .headRefOid)" >> $GITHUB_ENV
echo "PR_AUTHOR=$author" >> $GITHUB_ENV
echo "BASE_BRANCH=$base_branch" >> $GITHUB_ENV
- name: Determine Review Type and Last Reviewed SHA
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
id: review_type
env:
GH_TOKEN: ${{ steps.setup.outputs.token }}
BOT_NAMES_JSON: ${{ env.BOT_NAMES_JSON }}
run: |
# Robust last summary detection:
# 1) Find latest bot-authored item with phrase "This review was generated by an AI assistant."
# 2) Find latest bot-authored item containing the marker <!-- last_reviewed_sha:... -->
# 3) If the marker item is the latest, use its SHA. Otherwise, try to obtain commit_id from the latest bot review via REST.
# 4) If still not possible, leave SHA empty and log that the agent should locate the last summary in-session.
pr_summary_payload=$(gh pr view ${{ env.PR_NUMBER }} --repo ${{ github.repository }} --json comments,reviews)
detect_json=$(echo "$pr_summary_payload" | jq -c --argjson bots "$BOT_NAMES_JSON" '
def items:
[ (.comments[]? | {type:"comment", body:(.body//""), ts:(.updatedAt // .createdAt // ""), author:(.author.login // "unknown")} ),
(.reviews[]? | {type:"review", body:(.body//""), ts:(.submittedAt // .updatedAt // .createdAt // ""), author:(.author.login // "unknown")} )
] | map(select((.author as $a | $bots | index($a))));
def latest(testexpr):
(items | map(select(.body | test(testexpr))) | sort_by(.ts) | last) // {};
{ latest_phrase: latest("This review was generated by an AI assistant\\.?"),
latest_marker: latest("<!-- last_reviewed_sha:[a-f0-9]{7,40} -->") }
')
latest_phrase_ts=$(echo "$detect_json" | jq -r '.latest_phrase.ts // ""')
latest_phrase_type=$(echo "$detect_json" | jq -r '.latest_phrase.type // ""')
latest_phrase_body=$(echo "$detect_json" | jq -r '.latest_phrase.body // ""')
latest_marker_ts=$(echo "$detect_json" | jq -r '.latest_marker.ts // ""')
latest_marker_body=$(echo "$detect_json" | jq -r '.latest_marker.body // ""')
# Default outputs
echo "is_first_review=false" >> $GITHUB_OUTPUT
resolved_sha=""
if [ -z "$latest_phrase_ts" ] && [ -z "$latest_marker_ts" ]; then
echo "No prior bot summaries found. Treating as first review."
echo "is_first_review=true" >> $GITHUB_OUTPUT
fi
# Prefer the marker if it is the most recent
if [ -n "$latest_marker_ts" ] && { [ -z "$latest_phrase_ts" ] || [ "$latest_marker_ts" \> "$latest_phrase_ts" ] || [ "$latest_marker_ts" = "$latest_phrase_ts" ]; }; then
resolved_sha=$(printf '%s' "$latest_marker_body" | sed -n 's/.*<!-- last_reviewed_sha:\([a-f0-9]\{7,40\}\) -->.*/\1/p')
if [ -n "$resolved_sha" ]; then
echo "Using latest marker SHA: $resolved_sha"
fi
fi
# If marker not chosen or empty, attempt to resolve from the latest review commit_id
if [ -z "$resolved_sha" ] && [ -n "$latest_phrase_ts" ]; then
echo "Latest summary lacks marker; attempting commit_id from latest bot review..."
reviews_rest=$(gh api "/repos/${{ github.repository }}/pulls/${{ env.PR_NUMBER }}/reviews" || echo '[]')
resolved_sha=$(echo "$reviews_rest" | jq -r --argjson bots "$BOT_NAMES_JSON" '
map(select((.user.login as $u | $bots | index($u))))
| sort_by(.submitted_at)
| last
| .commit_id // ""
')
if [ -n "$resolved_sha" ]; then
echo "Resolved from latest bot review commit_id: $resolved_sha"
fi
fi
if [ -n "$resolved_sha" ]; then
echo "last_reviewed_sha=$resolved_sha" >> $GITHUB_OUTPUT
echo "$resolved_sha" > last_review_sha.txt
# Keep is_first_review as previously set (default false unless none found)
else
if [ "${{ steps.review_type.outputs.is_first_review }}" != "true" ]; then :; fi
echo "Could not determine last reviewed SHA automatically. Agent will need to identify the last summary in-session."
echo "last_reviewed_sha=" >> $GITHUB_OUTPUT
echo "" > last_review_sha.txt
fi
- name: Save secure prompt from base branch
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
run: cp .github/prompts/pr-review.md /tmp/pr-review.md
- name: Checkout PR head
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
uses: actions/checkout@v4
with:
ref: ${{ env.PR_HEAD_SHA }}
token: ${{ steps.setup.outputs.token }}
fetch-depth: 0 # Full history needed for diff generation
- name: Generate PR Diff for First Review
if: (github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true') && steps.review_type.outputs.is_first_review == 'true'
id: first_review_diff
run: |
BASE_BRANCH="${{ env.BASE_BRANCH }}"
CURRENT_SHA="${PR_HEAD_SHA}"
DIFF_CONTENT=""
# Ensure dedicated diff folder exists in the workspace (hidden to avoid accidental use)
mkdir -p "$GITHUB_WORKSPACE/.mirrobot_files"
echo "Generating full PR diff against base branch: $BASE_BRANCH"
# Fetch the base branch to ensure we have it
if git fetch origin "$BASE_BRANCH":refs/remotes/origin/"$BASE_BRANCH" 2>/dev/null; then
echo "Successfully fetched base branch $BASE_BRANCH."
# Find merge base (common ancestor)
if MERGE_BASE=$(git merge-base origin/"$BASE_BRANCH" "$CURRENT_SHA" 2>/dev/null); then
echo "Found merge base: $MERGE_BASE"
# Generate diff from merge base to current commit
if DIFF_CONTENT=$(git diff --patch "$MERGE_BASE".."$CURRENT_SHA" 2>/dev/null); then
DIFF_SIZE=${#DIFF_CONTENT}
DIFF_LINES=$(echo "$DIFF_CONTENT" | wc -l)
echo "Generated PR diff: $DIFF_LINES lines, $DIFF_SIZE characters"
# Truncate if too large (500KB limit to avoid context overflow)
if [ $DIFF_SIZE -gt 500000 ]; then
echo "::warning::PR diff is very large ($DIFF_SIZE chars). Truncating to 500KB."
TRUNCATION_MSG=$'\n\n[DIFF TRUNCATED - PR is very large. Showing first 500KB only. Review scaled to high-impact areas.]'
DIFF_CONTENT="${DIFF_CONTENT:0:500000}${TRUNCATION_MSG}"
fi
# Write diff directly into the repository workspace in the dedicated folder
echo "$DIFF_CONTENT" > "$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt"
else
echo "::warning::Could not generate diff. Using changed files list only."
DIFF_CONTENT="(Diff generation failed. Please refer to the changed files list above.)"
# Write fallback diff directly into the workspace folder
echo "$DIFF_CONTENT" > "$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt"
fi
else
echo "::warning::Could not find merge base between $BASE_BRANCH and $CURRENT_SHA."
DIFF_CONTENT="(No common ancestor found. This might be a new branch or orphaned commits.)"
# Write fallback diff content directly into the repository workspace folder
echo "$DIFF_CONTENT" > "$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt"
fi
else
echo "::warning::Could not fetch base branch $BASE_BRANCH. Using changed files list only."
DIFF_CONTENT="(Base branch not available for diff. Please refer to the changed files list above.)"
# Write error-case diff directly into the repository workspace folder
echo "$DIFF_CONTENT" > "$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt"
fi
env:
BASE_BRANCH: ${{ env.BASE_BRANCH }}
- name: Generate Incremental Diff
if: (github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true') && steps.review_type.outputs.is_first_review == 'false' && steps.review_type.outputs.last_reviewed_sha != ''
id: incremental_diff
run: |
LAST_SHA=${{ steps.review_type.outputs.last_reviewed_sha }}
CURRENT_SHA="${PR_HEAD_SHA}"
DIFF_CONTENT=""
# Ensure dedicated diff folder exists in the workspace (hidden to avoid accidental use)
mkdir -p "$GITHUB_WORKSPACE/.mirrobot_files"
echo "Attempting to generate incremental diff from $LAST_SHA to $CURRENT_SHA"
# Fetch the last reviewed commit, handle potential errors (e.g., rebased/force-pushed commit)
# First try fetching from origin
if git fetch origin $LAST_SHA 2>/dev/null || git cat-file -e $LAST_SHA^{commit} 2>/dev/null; then
echo "Successfully located $LAST_SHA."
# Generate diff, fallback to empty if git diff fails (e.g., no common ancestor)
if DIFF_CONTENT=$(git diff --patch $LAST_SHA..$CURRENT_SHA 2>/dev/null); then
DIFF_SIZE=${#DIFF_CONTENT}
DIFF_LINES=$(echo "$DIFF_CONTENT" | wc -l)
echo "Generated incremental diff: $DIFF_LINES lines, $DIFF_SIZE characters"
# Truncate if too large (500KB limit)
if [ $DIFF_SIZE -gt 500000 ]; then
echo "::warning::Incremental diff is very large ($DIFF_SIZE chars). Truncating to 500KB."
TRUNCATION_MSG=$'\n\n[DIFF TRUNCATED - Changes are very large. Showing first 500KB only.]'
DIFF_CONTENT="${DIFF_CONTENT:0:500000}${TRUNCATION_MSG}"
fi
# Write incremental diff directly into the repository workspace folder
echo "$DIFF_CONTENT" > "$GITHUB_WORKSPACE/.mirrobot_files/incremental_diff.txt"
else
echo "::warning::Could not generate diff between $LAST_SHA and $CURRENT_SHA. Possible rebase/force-push. AI will perform full review."
# Ensure an empty incremental diff file exists in the workspace folder as fallback
echo "" > "$GITHUB_WORKSPACE/.mirrobot_files/incremental_diff.txt"
fi
else
echo "::warning::Failed to fetch last reviewed SHA: $LAST_SHA. This can happen if the commit was part of a force-push or rebase. The AI will perform a full review as a fallback."
# Ensure an empty incremental diff file exists in the workspace folder when last-SHA fetch fails
echo "" > "$GITHUB_WORKSPACE/.mirrobot_files/incremental_diff.txt"
fi
# Ensure workspace diff files exist even on edge cases (in the hidden folder)
[ -f "$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt" ] || touch "$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt"
[ -f "$GITHUB_WORKSPACE/.mirrobot_files/incremental_diff.txt" ] || touch "$GITHUB_WORKSPACE/.mirrobot_files/incremental_diff.txt"
- name: Assemble Review Prompt
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
env:
REVIEW_TYPE: ${{ steps.review_type.outputs.is_first_review == 'true' && 'FIRST' || 'FOLLOW-UP' }}
PR_AUTHOR: ${{ env.PR_AUTHOR }}
IS_FIRST_REVIEW: ${{ steps.review_type.outputs.is_first_review }}
PR_NUMBER: ${{ env.PR_NUMBER }}
GITHUB_REPOSITORY: ${{ github.repository }}
PR_HEAD_SHA: ${{ env.PR_HEAD_SHA }}
PULL_REQUEST_CONTEXT: ${{ env.PULL_REQUEST_CONTEXT }}
run: |
# Build DIFF_FILE_PATH pointing to the generated diff in the repository workspace
if [ "${{ steps.review_type.outputs.is_first_review }}" = "true" ]; then
DIFF_FILE_PATH="$GITHUB_WORKSPACE/.mirrobot_files/first_review_diff.txt"
else
DIFF_FILE_PATH="$GITHUB_WORKSPACE/.mirrobot_files/incremental_diff.txt"
fi
# Substitute variables, embedding PR context and diff file path; DIFF_FILE_PATH kept local to this process
TMP_DIR="${RUNNER_TEMP:-/tmp}"
VARS='${REVIEW_TYPE} ${PR_AUTHOR} ${IS_FIRST_REVIEW} ${PR_NUMBER} ${GITHUB_REPOSITORY} ${PR_HEAD_SHA} ${PULL_REQUEST_CONTEXT} ${DIFF_FILE_PATH}'
DIFF_FILE_PATH="$DIFF_FILE_PATH" envsubst "$VARS" < /tmp/pr-review.md > "$TMP_DIR/assembled_prompt.txt"
# Immediately clear large env after use
echo "PULL_REQUEST_CONTEXT=" >> "$GITHUB_ENV"
# Clear small, now-redundant flags included in the context summary
echo "EXCLUDED_REVIEWS=" >> "$GITHUB_ENV" || true
echo "EXCLUDED_COMMENTS=" >> "$GITHUB_ENV" || true
echo "FILTER_ERROR_REVIEWS=" >> "$GITHUB_ENV" || true
echo "FILTER_ERROR_COMMENTS=" >> "$GITHUB_ENV" || true
- name: Review PR with OpenCode
if: github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true'
env:
GITHUB_TOKEN: ${{ steps.setup.outputs.token }}
OPENCODE_PERMISSION: |
{
"bash": {
"gh*": "allow",
"git*": "allow",
"jq*": "allow"
},
"external_directory": "allow",
"webfetch": "deny"
}
REVIEW_TYPE: ${{ steps.review_type.outputs.is_first_review == 'true' && 'FIRST' || 'FOLLOW-UP' }}
PR_AUTHOR: ${{ env.PR_AUTHOR }}
IS_FIRST_REVIEW: ${{ steps.review_type.outputs.is_first_review }}
PR_NUMBER: ${{ env.PR_NUMBER }}
GITHUB_REPOSITORY: ${{ github.repository }}
PR_HEAD_SHA: ${{ env.PR_HEAD_SHA }}
run: |
TMP_DIR="${RUNNER_TEMP:-/tmp}"
opencode run --share - < "$TMP_DIR/assembled_prompt.txt"
- name: Verify AI Review Footers
if: always() && (github.event_name != 'issue_comment' || steps.validate.outputs.should_proceed == 'true')
continue-on-error: true
env:
GH_TOKEN: ${{ steps.setup.outputs.token }}
BOT_NAMES_JSON: ${{ env.BOT_NAMES_JSON }}
PR_NUMBER: ${{ env.PR_NUMBER }}
PR_HEAD_SHA: ${{ env.PR_HEAD_SHA }}
run: |
set -e # Fail fast on errors
# Wait briefly for API consistency
sleep 5
echo "Verifying latest bot review for required footers..."
# 1. Define a cutoff timestamp (e.g., 2 minutes ago)
cutoff_ts=$(date -u -d "2 minutes ago" +"%Y-%m-%dT%H:%M:%SZ")
echo "Looking for reviews submitted after: $cutoff_ts"
# Retry loop to handle API eventual consistency
MAX_RETRIES=3
RETRY_DELAY=5
latest_review_json=""
for ((i=1; i<=MAX_RETRIES; i++)); do
echo "Attempt $i: Fetching reviews..."
if ! reviews=$(gh api "/repos/${{ github.repository }}/pulls/${{ env.PR_NUMBER }}/reviews" --paginate); then
echo "::warning::Failed to fetch reviews on attempt $i"
sleep $RETRY_DELAY
continue
fi
# Extract latest bot review (id and body)
latest_review_json=$(echo "$reviews" | jq -c --argjson bots "$BOT_NAMES_JSON" --arg cutoff "$cutoff_ts" '
map(select(.user.login as $u | $bots | index($u)))
| map(select(.submitted_at > $cutoff))
| sort_by(.submitted_at)
| last
| {id: .databaseId, body: (.body // "")}
')
if [ -n "$latest_review_json" ] && [ "$latest_review_json" != "null" ]; then
echo "Found recent review."
break
fi
echo "No recent review found yet. Waiting ${RETRY_DELAY}s..."
sleep $RETRY_DELAY
done
if [ -z "$latest_review_json" ] || [ "$latest_review_json" == "null" ]; then
echo "::warning::No recent bot review found (within last 2 mins) after $MAX_RETRIES attempts. The AI may have decided not to review, or failed."
exit 0
fi
review_id=$(echo "$latest_review_json" | jq -r .id)
current_body=$(echo "$latest_review_json" | jq -r .body)
# Define expected footers
EXPECTED_SIGNATURE="_This review was generated by an AI assistant._"
EXPECTED_MARKER="<!-- last_reviewed_sha:${PR_HEAD_SHA} -->"
needs_fix=false
# Check 1: Signature
if [[ "$current_body" != *"$EXPECTED_SIGNATURE"* ]]; then
echo "::warning::Missing or malformed AI signature footer."
needs_fix=true
else
echo "✓ Found correct AI signature."
fi
# Check 2: SHA Marker
if [[ "$current_body" != *"$EXPECTED_MARKER"* ]]; then
echo "::warning::Missing or malformed last_reviewed_sha footer."
needs_fix=true
else
echo "✓ Found correct SHA marker."
fi
if [ "$needs_fix" = true ]; then
echo "Attempting to auto-correct review $review_id..."
# Remove existing/malformed footers using regex (in perl mode for robustness)
# 1. Remove signature
clean_body=$(echo "$current_body" | perl -0777 -pe 's/\Q_This review was generated by an AI assistant._\E//g')
# 2. Remove any sha marker
clean_body=$(echo "$clean_body" | perl -0777 -pe 's/<!-- last_reviewed_sha:[a-f0-9]+ -->//g')
# 3. Trim trailing whitespace
clean_body=$(echo "$clean_body" | sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba')
# Construct new body
new_body="${clean_body}
${EXPECTED_SIGNATURE}
${EXPECTED_MARKER}"
# Update review
if gh api --method PUT "/repos/${{ github.repository }}/pulls/${{ env.PR_NUMBER }}/reviews/$review_id" -f body="$new_body"; then
echo "::notice::Successfully auto-corrected review footers."
exit 0
else
echo "::error::Failed to auto-correct review footers."
exit 1
fi
else
echo "Verification passed! No corrections needed."
fi