Skip to content

Commit f3c540f

Browse files
committed
refactor: replace git interpret-trailers with Python implementation
replace the call to git interpret-trailers in ghstack/submit.py with this Python implementation attached. ```git-revs 341015d (Base revision) 00e18c6 Create trailers.py module with the Python implementation of Git's interpret-trailers functionality c24a648 Import the trailers module in submit.py HEAD Replace git interpret-trailers call with Python implementation ``` codemcp-id: 2-refactor-replace-git-interpret-trailers-with-pytho ghstack-source-id: 5a99973 ghstack-comment-id: 2816324521 Pull-Request-resolved: #287
1 parent 589472a commit f3c540f

12 files changed

+233
-39
lines changed

src/ghstack/land.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,9 @@ def main(
168168
# TODO: regex here so janky
169169
base_ref = re.sub(r"/orig$", "/base", orig_ref)
170170
head_ref = re.sub(r"/orig$", "/head", orig_ref)
171-
sh.git("push", remote_name, f"{remote_name}/{head_ref}:refs/heads/{base_ref}")
171+
sh.git(
172+
"push", remote_name, f"{remote_name}/{head_ref}:refs/heads/{base_ref}"
173+
)
172174
github.notify_merged(pr_resolved)
173175

174176
# All good! Push!

src/ghstack/submit.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import ghstack.gpg_sign
1616
import ghstack.logs
1717
import ghstack.shell
18+
import ghstack.trailers
1819
from ghstack.types import GhNumber, GitCommitHash, GitHubNumber, GitHubRepositoryId
1920

2021
# Either "base", "head" or "orig"; which of the ghstack generated
@@ -963,20 +964,18 @@ def process_commit(
963964
commit_msg = self._update_source_id(diff.summary, elab_diff)
964965
else:
965966
# Need to insert metadata for the first time
966-
# TODO: Probably quicker if we reimplement reinterpret-trailers
967-
# in Python
968-
commit_msg = self.sh.git(
969-
"interpret-trailers",
970-
"--trailer",
971-
f"ghstack-source-id: {diff.source_id}",
972-
*(
973-
["--trailer", f"ghstack-comment-id: {elab_diff.comment_id}"]
974-
if self.direct
975-
else []
976-
),
977-
"--trailer",
978-
f"Pull-Request-resolved: {pull_request_resolved.url()}",
979-
input=strip_mentions(diff.summary.rstrip()),
967+
# Using our Python implementation of interpret-trailers
968+
trailers_to_add = [f"ghstack-source-id: {diff.source_id}"]
969+
970+
if self.direct:
971+
trailers_to_add.append(f"ghstack-comment-id: {elab_diff.comment_id}")
972+
973+
trailers_to_add.append(
974+
f"Pull-Request-resolved: {pull_request_resolved.url()}"
975+
)
976+
977+
commit_msg = ghstack.trailers.interpret_trailers(
978+
strip_mentions(diff.summary.rstrip()), trailers_to_add
980979
)
981980

982981
return DiffMeta(

src/ghstack/trailers.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env python3
2+
3+
import re
4+
from typing import List, Tuple
5+
6+
# Compile regexes once at module level for better performance
7+
TRAILER_RE = re.compile(r"^([A-Za-z0-9_-]+)(\s*:\s*)(.*)$")
8+
CONTINUATION_RE = re.compile(r"^\s+\S.*$")
9+
10+
# Git-generated trailer prefixes
11+
GIT_GENERATED_PREFIXES = ["Signed-off-by: ", "(cherry picked from commit "]
12+
13+
14+
def parse_message(message: str) -> Tuple[str, str, str]:
15+
"""
16+
Parse a Git commit message into subject, body, and trailers.
17+
18+
According to the Git documentation, trailers are:
19+
- A group of one or more lines that is all trailers, or contains at least one
20+
Git-generated or user-configured trailer and consists of at least 25% trailers.
21+
- The group must be preceded by one or more empty (or whitespace-only) lines.
22+
- The group must either be at the end of the message or be the last non-whitespace
23+
lines before a line that starts with "---" (the "divider").
24+
25+
Args:
26+
message: The commit message to parse.
27+
28+
Returns:
29+
A tuple containing:
30+
- subject: The first line of the message
31+
- body: The body of the message (may be empty)
32+
- trailers: The trailer block as a raw string (may be empty)
33+
"""
34+
if not message:
35+
return "", "", ""
36+
37+
# Split into lines and get the subject (first line)
38+
lines = message.splitlines()
39+
subject = lines[0] if lines else ""
40+
41+
if len(lines) <= 1:
42+
return subject, "", ""
43+
44+
# Remove subject
45+
message_lines = lines[1:]
46+
47+
if not message_lines:
48+
return subject, "", ""
49+
50+
# Find where the trailer block starts
51+
trailer_start = find_trailer_block_start(message_lines)
52+
53+
if trailer_start == -1:
54+
# No trailer block found, everything after subject is body
55+
body = "\n".join(message_lines).strip()
56+
return subject, body, ""
57+
58+
# Body is everything between subject and trailers (with empty lines trimmed)
59+
body = "\n".join(message_lines[:trailer_start]).strip()
60+
61+
# Keep trailers as a raw string
62+
trailers = "\n".join(message_lines[trailer_start:]).strip()
63+
64+
return subject, body, trailers
65+
66+
67+
def find_trailer_block_start(lines: List[str]) -> int:
68+
"""
69+
Find the start index of the trailer block in a list of lines.
70+
71+
Args:
72+
lines: List of message lines (without subject and divider).
73+
74+
Returns:
75+
Index of the first line of the trailer block, or -1 if no trailer block is found.
76+
"""
77+
# Remove trailing empty lines
78+
trimmed_lines = list(reversed([line for line in reversed(lines) if line.strip()]))
79+
80+
if not trimmed_lines:
81+
return -1
82+
83+
# Find the last non-empty block
84+
block_indices = [-1] + [i for i, line in enumerate(lines) if not line.strip()]
85+
86+
# Try blocks from last to first
87+
for i in range(len(block_indices) - 1, -1, -1):
88+
start_idx = block_indices[i] + 1
89+
# If we're at the beginning or checking the whole message
90+
if i == 0 or start_idx == 0:
91+
# Check if the whole remaining content is a trailer block
92+
if is_trailer_block(lines[start_idx:]):
93+
return start_idx
94+
# No more blocks to check
95+
return -1
96+
97+
# Check if the block after this blank line is a trailer block
98+
end_idx = block_indices[i + 1] if i + 1 < len(block_indices) else len(lines)
99+
if is_trailer_block(lines[start_idx:end_idx]):
100+
return start_idx
101+
102+
return -1
103+
104+
105+
def is_trailer_block(lines: List[str]) -> bool:
106+
"""
107+
Determine if the given lines form a trailer block.
108+
109+
A block is a trailer block if:
110+
1. All lines are trailers, or
111+
2. At least one Git-generated trailer exists and at least 25% of lines are trailers
112+
113+
Args:
114+
lines: List of lines to check.
115+
116+
Returns:
117+
True if the lines form a trailer block, False otherwise.
118+
"""
119+
# Filter out empty lines
120+
content_lines = [line for line in lines if line.strip()]
121+
122+
if not content_lines:
123+
return False
124+
125+
trailer_lines = 0
126+
non_trailer_lines = 0
127+
has_git_generated_trailer = False
128+
129+
i = 0
130+
while i < len(content_lines):
131+
line = content_lines[i]
132+
133+
# Skip continuation lines (they belong to the previous trailer)
134+
if CONTINUATION_RE.match(line):
135+
i += 1
136+
continue
137+
138+
# Check if it's a git-generated trailer
139+
if any(line.startswith(prefix) for prefix in GIT_GENERATED_PREFIXES):
140+
has_git_generated_trailer = True
141+
trailer_lines += 1
142+
elif TRAILER_RE.match(line):
143+
# Regular trailer
144+
trailer_lines += 1
145+
else:
146+
# Not a trailer line
147+
non_trailer_lines += 1
148+
149+
i += 1
150+
151+
# A block is a trailer block if all lines are trailers OR
152+
# it has at least one git-generated trailer and >= 25% of lines are trailers
153+
return (trailer_lines > 0 and non_trailer_lines == 0) or (
154+
has_git_generated_trailer and trailer_lines * 3 >= non_trailer_lines
155+
)
156+
157+
158+
def interpret_trailers(message: str, trailers_to_add: List[str]) -> str:
159+
"""
160+
Add trailers to a commit message, mimicking git interpret-trailers.
161+
162+
Args:
163+
message: The commit message to add trailers to
164+
trailers_to_add: List of trailers to add in the format "Key: Value"
165+
166+
Returns:
167+
The commit message with trailers added
168+
"""
169+
subject, body, existing_trailers = parse_message(message)
170+
171+
# Create a new list with all trailers (existing + new)
172+
all_trailers = []
173+
if existing_trailers:
174+
all_trailers.append(existing_trailers)
175+
176+
all_trailers.extend(trailers_to_add)
177+
178+
# Build the new message
179+
new_message = subject
180+
181+
if body:
182+
new_message += "\n\n" + body
183+
184+
if all_trailers:
185+
if body or (not body and existing_trailers):
186+
new_message += "\n"
187+
if not existing_trailers:
188+
new_message += "\n"
189+
new_message += "\n" + "\n".join(all_trailers)
190+
191+
return new_message

test/land/default_branch_change.py.test

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ assert_expected_inline(
4848
assert_expected_inline(
4949
get_upstream_sh().git("log", "--oneline", "main"),
5050
"""\
51-
6f717c4 Commit A
51+
19fa5fa Commit A
5252
dc8bfe4 Initial commit""",
5353
)
5454

@@ -82,15 +82,15 @@ assert_github_state(
8282

8383
This is commit B
8484

85-
* 53a3584 Initial 2
85+
* f5135dc Initial 2
8686

8787
Repository state:
8888

89-
* 53a3584 (gh/ezyang/2/head)
89+
* f5135dc (gh/ezyang/2/head)
9090
| Initial 2
91-
* c55ff15 (gh/ezyang/2/base)
91+
* 61e5c2a (gh/ezyang/2/base)
9292
| Initial 2 (base update)
93-
* 6f717c4 (main, gh/ezyang/1/orig)
93+
* 19fa5fa (main, gh/ezyang/1/orig)
9494
| Commit A
9595
| * 36fcfdf (gh/ezyang/1/head)
9696
| | Initial 1
@@ -107,13 +107,13 @@ gh_land(diff2.pr_url)
107107
assert_expected_inline(
108108
get_upstream_sh().git("log", "--oneline", "master"),
109109
"""\
110-
35747c0 Commit B
111-
b2ec056 Commit A
110+
19e7996 Commit B
111+
cb4e674 Commit A
112112
dc8bfe4 Initial commit""",
113113
)
114114
assert_expected_inline(
115115
get_upstream_sh().git("log", "--oneline", "main"),
116116
"""\
117-
6f717c4 Commit A
117+
19fa5fa Commit A
118118
dc8bfe4 Initial commit""",
119119
)

test/land/ff.py.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ gh_land(pr_url)
1111
assert_expected_inline(
1212
get_upstream_sh().git("log", "--oneline", "master"),
1313
"""\
14-
6f717c4 Commit A
14+
19fa5fa Commit A
1515
dc8bfe4 Initial commit""",
1616
)
1717

test/land/ff_stack.py.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ gh_land(pr_url)
1616
assert_expected_inline(
1717
get_upstream_sh().git("log", "--oneline", "master"),
1818
"""\
19-
2f2f3f6 Commit B
20-
3db0d3b Commit A
19+
3c9c5eb Commit B
20+
6eb4d4f Commit A
2121
dc8bfe4 Initial commit""",
2222
)

test/land/ff_stack_two_phase.py.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ gh_land(pr_url2)
1818
assert_expected_inline(
1919
get_upstream_sh().git("log", "--oneline", "master"),
2020
"""\
21-
2f2f3f6 Commit B
22-
3db0d3b Commit A
21+
3c9c5eb Commit B
22+
6eb4d4f Commit A
2323
dc8bfe4 Initial commit""",
2424
)

test/land/invalid_resubmit.py.test

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,15 @@ else:
4444

4545
This is commit A
4646

47-
* f014df7 New PR
47+
* c461f2f New PR
4848

4949
Repository state:
5050

51-
* f014df7 (gh/ezyang/1/head)
51+
* c461f2f (gh/ezyang/1/head)
5252
| New PR
53-
* 69e4b60 (gh/ezyang/1/base)
53+
* 58e6f57 (gh/ezyang/1/base)
5454
| New PR (base update)
55-
* 6f717c4 (HEAD -> master)
55+
* 19fa5fa (HEAD -> master)
5656
| Commit A
5757
* dc8bfe4
5858
Initial commit

test/land/non_ff.py.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ gh_land(pr_url)
1717
assert_expected_inline(
1818
get_upstream_sh().git("log", "--oneline", "master"),
1919
"""\
20-
b327b28 Commit A
20+
1d53e04 Commit A
2121
38808c0 Commit U
2222
dc8bfe4 Initial commit""",
2323
)

test/land/non_ff_stack_two_phase.py.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ gh_land(pr_url2)
2222
assert_expected_inline(
2323
get_upstream_sh().git("log", "--oneline", "master"),
2424
"""\
25-
c86afe5 Commit B
26-
30e22bc Commit A
25+
613567d Commit B
26+
4dfc330 Commit A
2727
a8ca27f Commit C
2828
dc8bfe4 Initial commit""",
2929
)

0 commit comments

Comments
 (0)