Skip to content

Commit 29a3223

Browse files
Feat: Create standalone get_pr_review_comments script
- Creates a new script `get_pr_review_comments_standalone.py`. - Inlines necessary functions from `firebase_github.py` to make it standalone. - Adds functionality to auto-determine GitHub owner/repo from git remote origin. - Allows specifying repository via `--url` or `--owner`/`--repo` arguments. - Prioritizes URL > owner/repo args > git remote detection. - Improves argument parsing and error handling for repository specification.
1 parent 5ea2a16 commit 29a3223

File tree

1 file changed

+380
-0
lines changed

1 file changed

+380
-0
lines changed
Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,380 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Fetches and formats review comments from a GitHub Pull Request."""
17+
18+
import argparse
19+
import os
20+
import sys
21+
import datetime
22+
from datetime import timezone, timedelta
23+
import requests
24+
import json
25+
import re
26+
import subprocess
27+
from requests.adapters import HTTPAdapter
28+
from requests.packages.urllib3.util.retry import Retry
29+
from absl import logging
30+
31+
# Constants from firebase_github.py
32+
RETRIES = 3
33+
BACKOFF = 5
34+
RETRY_STATUS = (403, 500, 502, 504)
35+
TIMEOUT = 5
36+
TIMEOUT_LONG = 20 # Not used in the functions we are copying, but good to have if expanding.
37+
38+
OWNER = '' # Will be determined dynamically or from args
39+
REPO = '' # Will be determined dynamically or from args
40+
BASE_URL = 'https://api.github.com'
41+
GITHUB_API_URL = '' # Will be set by set_repo_url_standalone
42+
43+
logging.set_verbosity(logging.INFO)
44+
45+
46+
def set_repo_url_standalone(owner_name, repo_name):
47+
global OWNER, REPO, GITHUB_API_URL
48+
OWNER = owner_name
49+
REPO = repo_name
50+
GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO)
51+
return True
52+
53+
54+
def requests_retry_session(retries=RETRIES,
55+
backoff_factor=BACKOFF,
56+
status_forcelist=RETRY_STATUS):
57+
session = requests.Session()
58+
retry = Retry(total=retries,
59+
read=retries,
60+
connect=retries,
61+
backoff_factor=backoff_factor,
62+
status_forcelist=status_forcelist)
63+
adapter = HTTPAdapter(max_retries=retry)
64+
session.mount('http://', adapter)
65+
session.mount('https://', adapter)
66+
return session
67+
68+
69+
def get_pull_request_review_comments(token, pull_number, since=None):
70+
"""https://docs.github.com/en/rest/pulls/comments#list-review-comments-on-a-pull-request"""
71+
url = f'{GITHUB_API_URL}/pulls/{pull_number}/comments'
72+
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
73+
74+
page = 1
75+
per_page = 100
76+
results = []
77+
78+
# Base parameters for the API request
79+
base_params = {'per_page': per_page}
80+
if since:
81+
base_params['since'] = since
82+
83+
while True: # Loop indefinitely until explicitly broken
84+
current_page_params = base_params.copy()
85+
current_page_params['page'] = page
86+
87+
try:
88+
with requests_retry_session().get(url, headers=headers, params=current_page_params,
89+
stream=True, timeout=TIMEOUT) as response:
90+
response.raise_for_status()
91+
# Log which page and if 'since' was used for clarity
92+
logging.info("get_pull_request_review_comments: %s params %s response: %s", url, current_page_params, response)
93+
94+
current_page_results = response.json()
95+
if not current_page_results: # No more results on this page
96+
break # Exit loop, no more comments to fetch
97+
98+
results.extend(current_page_results)
99+
100+
# If fewer results than per_page were returned, it's the last page
101+
if len(current_page_results) < per_page:
102+
break # Exit loop, this was the last page
103+
104+
page += 1 # Increment page for the next iteration
105+
106+
except requests.exceptions.RequestException as e:
107+
logging.error(f"Error fetching review comments (page {page}, params: {current_page_params}) for PR {pull_number}: {e}")
108+
break # Stop trying if there's an error
109+
return results
110+
111+
112+
def main():
113+
STATUS_IRRELEVANT = "[IRRELEVANT]"
114+
STATUS_OLD = "[OLD]"
115+
STATUS_CURRENT = "[CURRENT]"
116+
117+
determined_owner = None
118+
determined_repo = None
119+
try:
120+
git_url_bytes = subprocess.check_output(["git", "remote", "get-url", "origin"], stderr=subprocess.PIPE)
121+
git_url = git_url_bytes.decode().strip()
122+
# Regex for https://github.com/owner/repo.git or [email protected]:owner/repo.git
123+
match = re.search(r"(?:(?:https?://github\.com/)|(?:git@github\.com:))([^/]+)/([^/.]+)(?:\.git)?", git_url)
124+
if match:
125+
determined_owner = match.group(1)
126+
determined_repo = match.group(2)
127+
sys.stderr.write(f"Determined repository: {determined_owner}/{determined_repo} from git remote.\n")
128+
except (subprocess.CalledProcessError, FileNotFoundError, UnicodeDecodeError) as e:
129+
sys.stderr.write(f"Could not automatically determine repository from git remote: {e}\n")
130+
except Exception as e: # Catch any other unexpected error during git processing
131+
sys.stderr.write(f"An unexpected error occurred while determining repository: {e}\n")
132+
133+
# Helper function to parse owner/repo from URL
134+
def parse_repo_url(url_string):
135+
# Regex for https://github.com/owner/repo.git or [email protected]:owner/repo.git
136+
# Also handles URLs without .git suffix
137+
url_match = re.search(r"(?:(?:https?://github\.com/)|(?:git@github\.com:))([^/]+)/([^/.]+?)(?:\.git)?/?$", url_string)
138+
if url_match:
139+
return url_match.group(1), url_match.group(2)
140+
return None, None
141+
142+
parser = argparse.ArgumentParser(
143+
description="Fetch review comments from a GitHub PR and format into simple text output.\n"
144+
"Repository can be specified via --url, or --owner AND --repo, or auto-detected from git remote 'origin'.",
145+
formatter_class=argparse.RawTextHelpFormatter
146+
)
147+
parser.add_argument( # This is always required
148+
"--pull_number",
149+
type=int,
150+
required=True,
151+
help="Pull request number."
152+
)
153+
repo_spec_group = parser.add_mutually_exclusive_group()
154+
repo_spec_group.add_argument(
155+
"--url",
156+
type=str,
157+
default=None,
158+
help="Full GitHub repository URL (e.g., https://github.com/owner/repo or [email protected]:owner/repo.git). Overrides --owner/--repo and git detection."
159+
)
160+
# Create a sub-group for owner/repo pair if not using URL
161+
owner_repo_group = repo_spec_group.add_argument_group('owner_repo_pair', 'Specify owner and repository name (used if --url is not provided)')
162+
owner_repo_group.add_argument(
163+
"--owner",
164+
type=str,
165+
default=determined_owner,
166+
help=f"Repository owner. {'Default: ' + determined_owner if determined_owner else 'Required if --url is not used and not determinable from git.'}"
167+
)
168+
owner_repo_group.add_argument(
169+
"--repo",
170+
type=str,
171+
default=determined_repo,
172+
help=f"Repository name. {'Default: ' + determined_repo if determined_repo else 'Required if --url is not used and not determinable from git.'}"
173+
)
174+
parser.add_argument(
175+
"--token",
176+
type=str,
177+
default=os.environ.get("GITHUB_TOKEN"),
178+
help="GitHub token. Can also be set via GITHUB_TOKEN env var."
179+
)
180+
parser.add_argument(
181+
"--context-lines",
182+
type=int,
183+
default=10,
184+
help="Number of context lines from the diff hunk. 0 for full hunk. If > 0, shows header (if any) and last N lines of the remaining hunk. Default: 10."
185+
)
186+
parser.add_argument(
187+
"--since",
188+
type=str,
189+
default=None,
190+
help="Only show comments updated at or after this ISO 8601 timestamp (e.g., YYYY-MM-DDTHH:MM:SSZ)."
191+
)
192+
parser.add_argument(
193+
"--exclude-old",
194+
action="store_true",
195+
default=False,
196+
help="Exclude comments marked [OLD] (where line number has changed due to code updates but position is still valid)."
197+
)
198+
parser.add_argument(
199+
"--include-irrelevant",
200+
action="store_true",
201+
default=False,
202+
help="Include comments marked [IRRELEVANT] (where GitHub can no longer anchor the comment to the diff, i.e., position is null)."
203+
)
204+
205+
args = parser.parse_args()
206+
207+
if not args.token:
208+
sys.stderr.write("Error: GitHub token not provided. Set GITHUB_TOKEN or use --token.\n")
209+
sys.exit(1)
210+
211+
final_owner = None
212+
final_repo = None
213+
214+
if args.url:
215+
parsed_owner, parsed_repo = parse_repo_url(args.url)
216+
if parsed_owner and parsed_repo:
217+
final_owner = parsed_owner
218+
final_repo = parsed_repo
219+
sys.stderr.write(f"Using repository from --url: {final_owner}/{final_repo}\n")
220+
else:
221+
sys.stderr.write(f"Error: Invalid URL format provided: {args.url}. Expected https://github.com/owner/repo or [email protected]:owner/repo.git\n")
222+
parser.print_help()
223+
sys.exit(1)
224+
# If URL is not provided, check owner/repo. They default to determined_owner/repo.
225+
elif args.owner and args.repo:
226+
final_owner = args.owner
227+
final_repo = args.repo
228+
# If these values are different from the auto-detected ones (i.e., user explicitly provided them),
229+
# or if auto-detection failed and these are the only source.
230+
if (args.owner != determined_owner or args.repo != determined_repo) and (determined_owner or determined_repo):
231+
sys.stderr.write(f"Using repository from --owner/--repo args: {final_owner}/{final_repo}\n")
232+
# If auto-detection worked and user didn't override, the initial "Determined repository..." message is sufficient.
233+
elif args.owner or args.repo: # Only one of owner/repo was specified (and not --url)
234+
sys.stderr.write("Error: Both --owner and --repo must be specified if one is provided and --url is not used.\n")
235+
parser.print_help()
236+
sys.exit(1)
237+
# If --url, --owner, --repo are all None, it means auto-detection failed AND user provided nothing.
238+
# This case is caught by the final check below.
239+
240+
241+
if not final_owner or not final_repo:
242+
sys.stderr.write("Error: Could not determine repository. Please specify --url, or both --owner and --repo, or ensure git remote 'origin' is configured correctly.\n")
243+
parser.print_help()
244+
sys.exit(1)
245+
246+
if not set_repo_url_standalone(final_owner, final_repo):
247+
sys.stderr.write(f"Error: Could not set repository to {final_owner}/{final_repo}. Ensure owner/repo are correct.\n")
248+
sys.exit(1)
249+
250+
sys.stderr.write(f"Fetching comments for PR #{args.pull_number} from {OWNER}/{REPO}...\n")
251+
if args.since:
252+
sys.stderr.write(f"Filtering comments updated since: {args.since}\n")
253+
254+
comments = get_pull_request_review_comments(
255+
args.token,
256+
args.pull_number,
257+
since=args.since
258+
)
259+
260+
if not comments:
261+
sys.stderr.write(f"No review comments found for PR #{args.pull_number} (or matching filters), or an error occurred.\n")
262+
return
263+
264+
latest_activity_timestamp_obj = None
265+
processed_comments_count = 0
266+
print("# Review Comments\n\n")
267+
for comment in comments:
268+
created_at_str = comment.get("created_at")
269+
270+
current_pos = comment.get("position")
271+
current_line = comment.get("line")
272+
original_line = comment.get("original_line")
273+
274+
status_text = ""
275+
line_to_display = None
276+
277+
if current_pos is None:
278+
status_text = STATUS_IRRELEVANT
279+
line_to_display = original_line
280+
elif original_line is not None and current_line != original_line:
281+
status_text = STATUS_OLD
282+
line_to_display = current_line
283+
else:
284+
status_text = STATUS_CURRENT
285+
line_to_display = current_line
286+
287+
if line_to_display is None:
288+
line_to_display = "N/A"
289+
290+
if status_text == STATUS_IRRELEVANT and not args.include_irrelevant:
291+
continue
292+
if status_text == STATUS_OLD and args.exclude_old:
293+
continue
294+
295+
# Track latest 'updated_at' for '--since' suggestion; 'created_at' is for display.
296+
updated_at_str = comment.get("updated_at")
297+
if updated_at_str: # Check if updated_at_str is not None and not empty
298+
try:
299+
if sys.version_info < (3, 11):
300+
dt_str_updated = updated_at_str.replace("Z", "+00:00")
301+
else:
302+
dt_str_updated = updated_at_str
303+
current_comment_activity_dt = datetime.datetime.fromisoformat(dt_str_updated)
304+
if latest_activity_timestamp_obj is None or current_comment_activity_dt > latest_activity_timestamp_obj:
305+
latest_activity_timestamp_obj = current_comment_activity_dt
306+
except ValueError:
307+
sys.stderr.write(f"Warning: Could not parse updated_at timestamp: {updated_at_str}\n")
308+
309+
# Get other comment details
310+
user = comment.get("user", {}).get("login", "Unknown user")
311+
path = comment.get("path", "N/A")
312+
body = comment.get("body", "").strip()
313+
314+
if not body:
315+
continue
316+
317+
processed_comments_count += 1
318+
319+
diff_hunk = comment.get("diff_hunk")
320+
html_url = comment.get("html_url", "N/A")
321+
comment_id = comment.get("id")
322+
in_reply_to_id = comment.get("in_reply_to_id")
323+
324+
print(f"## Comment by: **{user}** (ID: `{comment_id}`){f' (In Reply To: `{in_reply_to_id}`)' if in_reply_to_id else ''}\n")
325+
if created_at_str:
326+
print(f"* **Timestamp**: `{created_at_str}`")
327+
print(f"* **Status**: `{status_text}`")
328+
print(f"* **File**: `{path}`")
329+
print(f"* **Line**: `{line_to_display}`")
330+
print(f"* **URL**: <{html_url}>\n")
331+
332+
print("\n### Context:")
333+
print("```") # Start of Markdown code block
334+
if diff_hunk and diff_hunk.strip():
335+
if args.context_lines == 0: # User wants the full hunk
336+
print(diff_hunk)
337+
else: # User wants N lines of context (args.context_lines > 0)
338+
hunk_lines = diff_hunk.split('\n')
339+
if hunk_lines and hunk_lines[0].startswith("@@ "):
340+
print(hunk_lines[0])
341+
hunk_lines = hunk_lines[1:] # Modify list in place for remaining operations
342+
343+
# Proceed with the (potentially modified) hunk_lines
344+
# If hunk_lines is empty here (e.g. original hunk was only a header that was removed),
345+
# hunk_lines[-args.context_lines:] will be [], and "\n".join([]) is "",
346+
# so print("") will effectively print a newline. This is acceptable.
347+
print("\n".join(hunk_lines[-args.context_lines:]))
348+
else: # diff_hunk was None or empty
349+
print("(No diff hunk available for this comment)")
350+
print("```") # End of Markdown code block
351+
352+
print("\n### Comment:")
353+
print(body)
354+
print("\n---")
355+
356+
sys.stderr.write(f"\nPrinted {processed_comments_count} comments to stdout.\n")
357+
358+
if latest_activity_timestamp_obj:
359+
try:
360+
# Ensure it's UTC before adding timedelta, then format
361+
next_since_dt = latest_activity_timestamp_obj.astimezone(timezone.utc) + timedelta(seconds=2)
362+
next_since_str = next_since_dt.strftime('%Y-%m-%dT%H:%M:%SZ')
363+
364+
new_cmd_args = [sys.executable, sys.argv[0]] # Start with interpreter and script path
365+
i = 1 # Start checking from actual arguments in sys.argv
366+
while i < len(sys.argv):
367+
if sys.argv[i] == "--since":
368+
i += 2 # Skip --since and its value
369+
continue
370+
new_cmd_args.append(sys.argv[i])
371+
i += 1
372+
373+
new_cmd_args.extend(["--since", next_since_str])
374+
suggested_cmd = " ".join(new_cmd_args)
375+
sys.stderr.write(f"\nTo get comments created after the last one in this batch, try:\n{suggested_cmd}\n")
376+
except Exception as e:
377+
sys.stderr.write(f"\nWarning: Could not generate next command suggestion: {e}\n")
378+
379+
if __name__ == "__main__":
380+
main()

0 commit comments

Comments
 (0)