Skip to content

Commit 7d98027

Browse files
authored
build: add gh tool and script to generate PRs from commits (#424)
(cherry picked from commit 5c82a40)
1 parent b81c3d3 commit 7d98027

File tree

2 files changed

+248
-0
lines changed

2 files changed

+248
-0
lines changed

build-tools/final/Dockerfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,19 @@ ENV PATH="${HOME}/.cargo/bin:${PATH}"
9696
# Preprocessor for BPF used by cmake
9797
RUN pip3 install --break-system-packages pcpp
9898

99+
# install GitHub's gh CLI
100+
RUN (type -p wget >/dev/null || (sudo apt update && sudo apt install wget -y)) \
101+
&& sudo mkdir -p -m 755 /etc/apt/keyrings \
102+
&& out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \
103+
&& cat $out | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
104+
&& sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
105+
&& sudo mkdir -p -m 755 /etc/apt/sources.list.d \
106+
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
107+
&& sudo apt update \
108+
&& sudo apt install gh -y \
109+
&& sudo apt-get clean \
110+
&& sudo rm -rf /var/lib/apt/lists/*
111+
99112
# add a script to setup build inside of container
100113
# to be run after we build the image.
101114
RUN ln -s $HOME/src/dev/benv-build.sh build.sh

dev/commits_to_prs.sh

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
#!/usr/bin/env bash
2+
3+
# Converts each commit on top of UPSTREAM/BASE into its own PR.
4+
# For each commit (oldest to newest):
5+
# - Creates a branch from upstream base
6+
# - Cherry-picks the commit onto that branch
7+
# - Pushes the branch to origin
8+
# - Opens a PR against upstream/base with title/body from the commit
9+
#
10+
# Defaults assume a typical fork setup with remotes:
11+
# origin -> your fork (push target)
12+
# upstream -> canonical repo (PR base)
13+
#
14+
# Requirements:
15+
# - git and gh installed
16+
# - gh authenticated (gh auth login)
17+
# - remotes configured: an 'upstream' remote pointing to the canonical repo
18+
#
19+
# Environment variables (optional):
20+
# UPSTREAM_REMOTE Remote name for upstream (default: upstream)
21+
# ORIGIN_REMOTE Remote name for origin (default: origin)
22+
# BASE_BRANCH Base branch on upstream (default: main)
23+
# BRANCH_PREFIX Prefix for created branches (default: pr)
24+
# DRY_RUN If set (non-empty), only print actions
25+
# INCLUDE_MERGES If set (non-empty), attempt to include merge commits (default: skip merges)
26+
# OVERWRITE_LOCAL If set, overwrite existing local branch of same name
27+
#
28+
# Notes:
29+
# - Merge commits are skipped by default because cherry-picking merges
30+
# requires selecting a parent (-m) and can be ambiguous.
31+
# - On conflicts during cherry-pick, the script stops. Resolve conflicts and
32+
# re-run; it will skip already-created branches. If a commit was already
33+
# applied (empty cherry-pick), it is detected and skipped.
34+
35+
set -euo pipefail
36+
37+
UPSTREAM_REMOTE=${UPSTREAM_REMOTE:-upstream}
38+
ORIGIN_REMOTE=${ORIGIN_REMOTE:-origin}
39+
BASE_BRANCH=${BASE_BRANCH:-main}
40+
BRANCH_PREFIX=${BRANCH_PREFIX:-pr}
41+
DRY_RUN=${DRY_RUN:-}
42+
INCLUDE_MERGES=${INCLUDE_MERGES:-}
43+
OVERWRITE_LOCAL=${OVERWRITE_LOCAL:-}
44+
45+
log() { echo "[commits_to_prs] $*"; }
46+
die() { echo "[commits_to_prs][error] $*" >&2; exit 1; }
47+
48+
command -v git >/dev/null 2>&1 || die "git is required"
49+
command -v gh >/dev/null 2>&1 || die "gh is required (https://cli.github.com/)"
50+
51+
# Ensure gh is authenticated early to avoid half-progress
52+
if ! gh auth status >/dev/null 2>&1; then
53+
die "gh auth not configured. Run 'gh auth login' first."
54+
fi
55+
56+
# Determine upstream repo owner/repo slug
57+
determine_upstream_repo() {
58+
local upstream_repo=""
59+
# Prefer parsing from git remote URL to avoid gh needing a resolvable slug
60+
local upstream_url
61+
upstream_url=$(git remote get-url "$UPSTREAM_REMOTE" 2>/dev/null || true)
62+
if [ -n "$upstream_url" ]; then
63+
upstream_repo=$(echo "$upstream_url" \
64+
| sed -E 's#^git@[^:]+:##; s#^https?://[^/]+/##; s#\.git$##')
65+
fi
66+
if [ -z "$upstream_repo" ]; then
67+
upstream_repo=$(gh repo view --json nameWithOwner -q .nameWithOwner 2>/dev/null || true)
68+
fi
69+
printf "%s" "$upstream_repo"
70+
}
71+
72+
UPSTREAM_REPO=$(determine_upstream_repo)
73+
[ -n "$UPSTREAM_REPO" ] || die "Unable to determine upstream repo. Ensure remote '$UPSTREAM_REMOTE' exists."
74+
75+
# Determine origin owner (for detecting existing PRs as fork:branch)
76+
determine_origin_owner() {
77+
local origin_owner=""
78+
# Prefer parsing from git remote URL
79+
local origin_url
80+
origin_url=$(git remote get-url "$ORIGIN_REMOTE" 2>/dev/null || true)
81+
if [ -n "$origin_url" ]; then
82+
origin_owner=$(echo "$origin_url" \
83+
| sed -E 's#^git@[^:]+:##; s#^https?://[^/]+/##; s#/.*$##')
84+
fi
85+
if [ -z "$origin_owner" ]; then
86+
origin_owner=$(gh repo view --json owner -q .owner.login 2>/dev/null || true)
87+
fi
88+
printf "%s" "$origin_owner"
89+
}
90+
91+
ORIGIN_OWNER=$(determine_origin_owner)
92+
[ -n "$ORIGIN_OWNER" ] || die "Unable to determine origin owner. Ensure remote '$ORIGIN_REMOTE' exists."
93+
94+
# Check if a PR already exists for this branch
95+
# Tries multiple strategies to be robust across gh versions.
96+
pr_exists_for_branch() {
97+
local branch_name="$1"
98+
local out
99+
# 1) Direct head filter with owner:branch
100+
out=$(gh pr list --repo "$UPSTREAM_REPO" --state open --head "$ORIGIN_OWNER:$branch_name" --json number --jq '.[0].number' 2>/dev/null || true)
101+
if [ -n "$out" ]; then return 0; fi
102+
# 2) Filter open PRs by headRefName (branch name)
103+
out=$(gh pr list --repo "$UPSTREAM_REPO" --state open --json number,headRefName --jq 'map(select(.headRefName=="'"$branch_name"'")) | .[0].number' 2>/dev/null || true)
104+
if [ -n "$out" ]; then return 0; fi
105+
# 3) As a fallback, search all PRs (in case of state mismatch)
106+
out=$(gh pr list --repo "$UPSTREAM_REPO" --state all --json number,headRefName --jq 'map(select(.headRefName=="'"$branch_name"'")) | .[0].number' 2>/dev/null || true)
107+
[ -n "$out" ]
108+
}
109+
110+
# Fetch latest base
111+
log "Fetching $UPSTREAM_REMOTE/$BASE_BRANCH"
112+
if [ -z "$DRY_RUN" ]; then
113+
git fetch "$UPSTREAM_REMOTE" "$BASE_BRANCH" --tags --prune
114+
fi
115+
116+
BASE_REF="$UPSTREAM_REMOTE/$BASE_BRANCH"
117+
118+
# Compute list of commits unique to HEAD vs base, oldest->newest
119+
log "Computing commits on top of $BASE_REF"
120+
if [ -n "$INCLUDE_MERGES" ]; then
121+
COMMITS=$(git rev-list --reverse "$BASE_REF"..HEAD)
122+
else
123+
COMMITS=$(git rev-list --reverse --no-merges "$BASE_REF"..HEAD)
124+
fi
125+
126+
if [ -z "$COMMITS" ]; then
127+
log "No commits found on top of $BASE_REF. Nothing to do."
128+
exit 0
129+
fi
130+
131+
# Helper to slugify a string for branch names
132+
slugify() {
133+
# lower, replace non-alnum with '-', collapse dashes, trim, limit length
134+
tr '[:upper:]' '[:lower:]' \
135+
| sed -E 's/[^a-z0-9]+/-/g; s/^-+|-+$//g; s/-{2,}/-/g' \
136+
| cut -c1-50
137+
}
138+
139+
# For each commit, create branch, cherry-pick, push, and open PR
140+
for SHA in $COMMITS; do
141+
PARENTS=$(git show -s --format=%P "$SHA")
142+
if [ -z "$INCLUDE_MERGES" ] && [ "$(wc -w <<<"$PARENTS")" -gt 1 ]; then
143+
log "Skipping merge commit $SHA (set INCLUDE_MERGES=1 to include)"
144+
continue
145+
fi
146+
147+
SUBJECT=$(git show -s --format=%s "$SHA")
148+
BODY=$(git show -s --format=%b "$SHA")
149+
SLUG=$(printf '%s' "$SUBJECT" | slugify)
150+
SHORTSHA=$(printf '%.7s' "$SHA")
151+
BRANCH="$BRANCH_PREFIX/$SHORTSHA-$SLUG"
152+
153+
log "Processing $SHA → branch '$BRANCH'"
154+
155+
# Check local branch existence
156+
if git show-ref --verify --quiet "refs/heads/$BRANCH"; then
157+
if [ -z "$OVERWRITE_LOCAL" ]; then
158+
log "Local branch $BRANCH exists; skipping branch creation."
159+
else
160+
log "Overwriting local branch $BRANCH from $BASE_REF"
161+
if [ -z "$DRY_RUN" ]; then
162+
git branch -f "$BRANCH" "$BASE_REF"
163+
fi
164+
fi
165+
else
166+
log "Creating branch $BRANCH from $BASE_REF"
167+
if [ -z "$DRY_RUN" ]; then
168+
git branch "$BRANCH" "$BASE_REF"
169+
fi
170+
fi
171+
172+
# Switch to branch
173+
if [ -z "$DRY_RUN" ]; then
174+
git switch "$BRANCH"
175+
fi
176+
177+
# Cherry-pick the commit
178+
log "Cherry-picking $SHA onto $BRANCH"
179+
if [ -z "$DRY_RUN" ]; then
180+
if ! git cherry-pick -x "$SHA"; then
181+
# If the cherry-pick failed, it can be either a real conflict or an
182+
# empty pick because the commit is already applied to this branch.
183+
# Detect the latter and skip it so we can continue.
184+
if git rev-parse -q --verify CHERRY_PICK_HEAD >/dev/null 2>&1; then
185+
# If there are no unmerged files, the failure is likely an empty pick.
186+
if [ -z "$(git diff --name-only --diff-filter=U)" ]; then
187+
log "Cherry-pick produced no changes (already applied). Skipping $SHA."
188+
# Skip this pick in the sequencer and move on.
189+
git cherry-pick --skip >/dev/null 2>&1 || true
190+
else
191+
die "Cherry-pick conflicted for $SHA. Resolve conflicts and re-run."
192+
fi
193+
else
194+
die "Cherry-pick failed unexpectedly for $SHA."
195+
fi
196+
fi
197+
fi
198+
199+
# Push to origin
200+
log "Pushing $BRANCH to $ORIGIN_REMOTE"
201+
if [ -z "$DRY_RUN" ]; then
202+
git push -u "$ORIGIN_REMOTE" "$BRANCH"
203+
fi
204+
205+
# Create PR with title/body from commit
206+
log "Creating PR against $UPSTREAM_REPO:$BASE_BRANCH"
207+
if [ -z "$DRY_RUN" ]; then
208+
# If a PR for this fork branch already exists against upstream, skip creation
209+
if pr_exists_for_branch "$BRANCH"; then
210+
log "PR already exists for branch $BRANCH; skipping creation."
211+
else
212+
BODY_FILE=$(mktemp)
213+
cleanup() { rm -f "$BODY_FILE" 2>/dev/null || true; }
214+
trap cleanup EXIT
215+
printf '%s' "$BODY" > "$BODY_FILE"
216+
217+
if ! gh pr create \
218+
--repo "$UPSTREAM_REPO" \
219+
--base "$BASE_BRANCH" \
220+
--title "$SUBJECT" \
221+
--body-file "$BODY_FILE"; then
222+
# Race, or 'already exists' case; treat as success if detectable
223+
if pr_exists_for_branch "$BRANCH"; then
224+
log "Detected existing PR after creation attempt; continuing."
225+
else
226+
die "Failed to create PR for $SHA (branch $BRANCH)"
227+
fi
228+
fi
229+
fi
230+
fi
231+
232+
log "Done with $SHA"
233+
done
234+
235+
log "All done."

0 commit comments

Comments
 (0)