Skip to content

Commit 99ea87f

Browse files
committed
Update gemini_review.py
1 parent ae76322 commit 99ea87f

File tree

2 files changed

+113
-49
lines changed

2 files changed

+113
-49
lines changed

.github/workflows/gemini-pr-review.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ jobs:
1111
steps:
1212
- name: Checkout code
1313
uses: actions/checkout@v3
14-
with:
15-
fetch-depth: 0
14+
# with:
15+
# fetch-depth: 0
1616

1717
- name: Set up Python
1818
uses: actions/setup-python@v4

hack/gemini_review.py

Lines changed: 111 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,15 @@
22
import os
33
from github import Github
44
from google.cloud import storage
5+
import re
6+
7+
# Set the maximum number of comments to post on the PR
8+
MAX_COMMENTS = 20
9+
10+
total_comments_posted = 0
511

612
def get_pr_latest_commit_diff_files(repo_name, pr_number, github_token):
7-
"""Retrieves diff information for each file in the latest commit of a PR, excluding test files."""
13+
"""Retrieves diff information for each file in the latest commit of a PR, excluding test files and generated files."""
814
g = Github(github_token)
915
repo = g.get_repo(repo_name)
1016
pr = repo.get_pull(pr_number)
@@ -16,7 +22,7 @@ def get_pr_latest_commit_diff_files(repo_name, pr_number, github_token):
1622
files = latest_commit.files
1723
diff_files = []
1824
for file in files:
19-
if not file.filename.endswith("_test.go") and not file.filename.endswith("_test.py") and not "/test/" in file.filename:
25+
if not file.filename.endswith("_test.go") and not file.filename.endswith("_test.py") and not "/test/" in file.filename and "_generated" not in file.filename:
2026
if file.patch:
2127
diff_files.append(file)
2228
return diff_files
@@ -41,117 +47,175 @@ def download_and_combine_guidelines(bucket_name, prefix):
4147

4248
except Exception as e:
4349
print(f"Error downloading or combining guidelines: {e}")
44-
return ""
4550

4651
def download_and_combine_pr_comments(bucket_name, prefix):
4752
"""Downloads text files from GCS using the google-cloud-storage library."""
4853
try:
4954
storage_client = storage.Client()
5055
bucket = storage_client.bucket(bucket_name)
5156
blobs = bucket.list_blobs(prefix=prefix) # Use prefix for efficiency
52-
5357
pr_comments_content = ""
5458
# TODO: Skip for now, since it is too large
5559
# for blob in blobs:
5660
# if blob.name.endswith(".txt"):
5761
# pr_comments_content += blob.download_as_text() + "\n\n"
5862
return pr_comments_content
59-
6063
except Exception as e:
6164
print(f"Error downloading or combining PR comments: {e}")
6265
return ""
6366

6467
def generate_gemini_review_with_annotations(diff_file, api_key, guidelines, pr_comments):
65-
"""Generates a code review with annotations, incorporating guidelines."""
68+
"""Generates a code review with annotations using Gemini."""
6669
genai.configure(api_key=api_key)
6770
model = genai.GenerativeModel('gemini-2.0-flash')
6871

6972
diff = diff_file.patch
70-
max_diff_length = 20000 # Adjust based on token count
73+
max_diff_length = 100000
7174
if len(diff) > max_diff_length:
72-
diff = diff[:max_diff_length]
73-
diff += "\n... (truncated due to length limit) ..."
75+
diff = diff[:max_diff_length] + "\n... (truncated due to length limit)..."
7476

7577
prompt = f"""
76-
The following are the API review guidelines:
78+
You are an expert Kubernetes API reviewer. Follow these guidelines:
7779
7880
{guidelines}
7981
80-
The following are the previous PR comments history:
82+
Review the following code diff from `{diff_file.filename}`.
83+
84+
Your task is to identify potential issues and suggest concrete improvements.
85+
86+
Prioritize comments that highlight potential bugs, suggest improvements.
87+
88+
Avoid general comments that simply acknowledge correct code or good practices.
8189
82-
{pr_comments}
90+
Provide your review comments in the following format:
8391
84-
Review the following code diff from file `{diff_file.filename}` and provide feedback.
85-
Point out potential issues, based on the guidelines and the previous PR comments history.
86-
Keep the review concise.
92+
```
93+
line <line_number>: <comment>
94+
line <line_number>: <comment>
95+
...and so on
96+
```
97+
98+
* **Adhere to Conventions:**
99+
* Duration fields use `fooSeconds`.
100+
* Condition types are `PascalCase`.
101+
* Constants are `CamelCase`.
102+
* No unsigned integers.
103+
* Floating-point values are avoided in `spec`.
104+
* Use `int32` unless `int64` is necessary.
105+
* `Reason` is a one-word, `CamelCase` category of cause.
106+
* `Message` is a human-readable phrase with specifics.
107+
* Label keys are lowercase with dashes.
108+
* Annotations are for tooling and extensions.
109+
* **Compatibility:**
110+
* Added fields must have non-nil default values in all API versions.
111+
* New enum values must be handled safely by older clients.
112+
* Validation rules on spec fields cannot be relaxed nor strengthened.
113+
* Changes must be round-trippable with no loss of information.
114+
* **Changes:**
115+
* New fields should be optional and added in a new API version if possible.
116+
* Singular fields should not be made plural without careful consideration of compatibility.
117+
* Avoid renaming fields within the same API version.
118+
* When adding new fields or enum values, use feature gates to control enablement and ensure compatibility with older API servers.
87119
88120
```diff
89121
{diff}
90122
```
91123
"""
92-
# print("total_tokens: ", model.count_tokens(prompt))
93124
response = model.generate_content(prompt)
94-
return response.text if response.text else None
125+
if response and response.text:
126+
return response.text
127+
else:
128+
print("=== Gemini Response (Empty) ===")
129+
return None
95130

96131
def post_github_review_comments(repo_name, pr_number, diff_file, review_comment, github_token):
97-
"""Posts review comments to a GitHub pull request, annotating specific lines."""
132+
"""Posts review comments to GitHub PR, annotating specific lines."""
133+
global total_comments_posted # Declare total_comments_posted as global
98134
g = Github(github_token)
99135
repo = g.get_repo(repo_name)
100136
pr = repo.get_pull(pr_number)
101137

102138
if review_comment:
103139
commits = list(pr.get_commits())
104140
if not commits:
105-
print(f"WARNING: No commits found for PR {pr_number}. Posting general issue comment for {diff_file.filename}.")
141+
print(f"WARNING: No commits for PR {pr_number}. Posting general comment for {diff_file.filename}.")
106142
pr.create_issue_comment(f"Review for {diff_file.filename}:\n{review_comment}")
107143
return
108144

109145
latest_commit = commits[-1]
146+
diff_lines = diff_file.patch.splitlines()
147+
148+
# Use regex to find line numbers and comments
149+
line_comments = [(int(match.group(1)), match.group(2).strip())
150+
for match in re.finditer(r"line (\d+): (.*)", review_comment)]
151+
152+
for line_num, comment in line_comments:
153+
if total_comments_posted >= MAX_COMMENTS:
154+
print("Comment limit reached.")
155+
break
156+
try:
157+
corrected_line_num = None
158+
right_side_line = 0
159+
current_line = 0
160+
161+
for diff_line in diff_lines:
162+
if diff_line.startswith("@@"):
163+
# Extract right-side line number from hunk info
164+
hunk_info = diff_line.split("@@")[1].strip()
165+
right_side_info = hunk_info.split("+")[1].split(" ")[0]
166+
right_side_line = int(right_side_info.split(",")[0])
167+
current_line = right_side_line - 1
168+
169+
elif diff_line.startswith("+"):
170+
current_line += 1
171+
if current_line == line_num:
172+
corrected_line_num = current_line
173+
break
174+
175+
elif not diff_line.startswith("-") and not diff_line.startswith("@@"): #count unchanged lines.
176+
current_line += 1
177+
if current_line == line_num:
178+
corrected_line_num = current_line
179+
break
180+
181+
if corrected_line_num:
182+
pr.create_review_comment(
183+
body=comment,
184+
commit=latest_commit,
185+
path=diff_file.filename,
186+
line=corrected_line_num,
187+
side="RIGHT",
188+
)
189+
total_comments_posted += 1
190+
print(f"Review comments for {diff_file.filename} posted.")
191+
else:
192+
print(f"WARNING: Could not find line {line_num} in {diff_file.filename}.")
193+
print(f"Diff file: {diff_file.filename}")
194+
print(f"Gemini comment: {comment}")
195+
196+
except Exception as e:
197+
print(f"ERROR: Failed to create comment for line {line_num} in {diff_file.filename}: {e}")
110198

111-
# Parse the review comment for line number annotations
112-
lines_to_comment = []
113-
for line in review_comment.split('\n'):
114-
if "line" in line.lower() and ":" in line:
115-
try:
116-
line_num = int(line.lower().split("line")[1].split(":")[0].strip())
117-
lines_to_comment.append(line_num)
118-
except ValueError:
119-
continue
120-
121-
if lines_to_comment:
122-
for line_num in lines_to_comment:
123-
try:
124-
pr.create_review_comment(body=review_comment, commit=latest_commit, path=diff_file.filename, line=line_num, side="RIGHT")
125-
except Exception as e:
126-
print(f"ERROR: Failed to create review comment for line {line_num} in {diff_file.filename}: {e}")
127-
print(f"Review comments for {diff_file.filename} posted successfully.")
128-
else:
129-
pr.create_issue_comment(f"Review for {diff_file.filename}:\n{review_comment}")
130-
print(f"Review for {diff_file.filename} posted as general comment since no line number was found.")
131199
else:
132-
print(f"Gemini API returned no response for {diff_file.filename}.")
200+
print(f"Gemini returned no response for {diff_file.filename}.")
133201

134202
def main():
135-
"""Main function to orchestrate the Gemini PR review with annotations."""
203+
"""Main function to orchestrate Gemini PR review."""
136204
api_key = os.environ.get('GEMINI_API_KEY')
137205
pr_number = int(os.environ.get('PR_NUMBER'))
138206
repo_name = os.environ.get('GITHUB_REPOSITORY')
139207
github_token = os.environ.get('GITHUB_TOKEN')
140208

141-
# Use the GCS client library
142209
guidelines = download_and_combine_guidelines("hackathon-2025-sme-code-review-train", "guidelines/")
143210
if not guidelines:
144-
print("Warning: No guidelines loaded. Review will proceed without guidelines.")
211+
print("Warning: No guidelines loaded.")
145212

146213
diff_files = get_pr_latest_commit_diff_files(repo_name, pr_number, github_token)
147-
148214
if diff_files is None:
149-
print("Failed to retrieve PR diff files from latest commit. Exiting.")
215+
print("Failed to retrieve PR diff files. Exiting.")
150216
return
151217

152218
pr_comments = download_and_combine_pr_comments("hackathon-2025-sme-code-review-train", "pr_comments/")
153-
if not pr_comments:
154-
print("Warning: No PR comments loaded. Review will proceed without PR comments history.")
155219

156220
for diff_file in diff_files:
157221
review_comment = generate_gemini_review_with_annotations(diff_file, api_key, guidelines, pr_comments)

0 commit comments

Comments
 (0)