2
2
import os
3
3
from github import Github
4
4
from google .cloud import storage
5
+ import re
6
+
7
+ # Set the maximum number of comments to post on the PR
8
+ MAX_COMMENTS = 20
9
+
10
+ total_comments_posted = 0
5
11
6
12
def get_pr_latest_commit_diff_files (repo_name , pr_number , github_token ):
7
- """Retrieves diff information for each file in the latest commit of a PR, excluding test files."""
13
+ """Retrieves diff information for each file in the latest commit of a PR, excluding test files and generated files ."""
8
14
g = Github (github_token )
9
15
repo = g .get_repo (repo_name )
10
16
pr = repo .get_pull (pr_number )
@@ -16,7 +22,7 @@ def get_pr_latest_commit_diff_files(repo_name, pr_number, github_token):
16
22
files = latest_commit .files
17
23
diff_files = []
18
24
for file in files :
19
- if not file .filename .endswith ("_test.go" ) and not file .filename .endswith ("_test.py" ) and not "/test/" in file .filename :
25
+ if not file .filename .endswith ("_test.go" ) and not file .filename .endswith ("_test.py" ) and not "/test/" in file .filename and "_generated" not in file . filename :
20
26
if file .patch :
21
27
diff_files .append (file )
22
28
return diff_files
@@ -41,117 +47,175 @@ def download_and_combine_guidelines(bucket_name, prefix):
41
47
42
48
except Exception as e :
43
49
print (f"Error downloading or combining guidelines: { e } " )
44
- return ""
45
50
46
51
def download_and_combine_pr_comments (bucket_name , prefix ):
47
52
"""Downloads text files from GCS using the google-cloud-storage library."""
48
53
try :
49
54
storage_client = storage .Client ()
50
55
bucket = storage_client .bucket (bucket_name )
51
56
blobs = bucket .list_blobs (prefix = prefix ) # Use prefix for efficiency
52
-
53
57
pr_comments_content = ""
54
58
# TODO: Skip for now, since it is too large
55
59
# for blob in blobs:
56
60
# if blob.name.endswith(".txt"):
57
61
# pr_comments_content += blob.download_as_text() + "\n\n"
58
62
return pr_comments_content
59
-
60
63
except Exception as e :
61
64
print (f"Error downloading or combining PR comments: { e } " )
62
65
return ""
63
66
64
67
def generate_gemini_review_with_annotations (diff_file , api_key , guidelines , pr_comments ):
65
- """Generates a code review with annotations, incorporating guidelines ."""
68
+ """Generates a code review with annotations using Gemini ."""
66
69
genai .configure (api_key = api_key )
67
70
model = genai .GenerativeModel ('gemini-2.0-flash' )
68
71
69
72
diff = diff_file .patch
70
- max_diff_length = 20000 # Adjust based on token count
73
+ max_diff_length = 100000
71
74
if len (diff ) > max_diff_length :
72
- diff = diff [:max_diff_length ]
73
- diff += "\n ... (truncated due to length limit) ..."
75
+ diff = diff [:max_diff_length ] + "\n ... (truncated due to length limit)..."
74
76
75
77
prompt = f"""
76
- The following are the API review guidelines:
78
+ You are an expert Kubernetes API reviewer. Follow these guidelines:
77
79
78
80
{ guidelines }
79
81
80
- The following are the previous PR comments history:
82
+ Review the following code diff from `{ diff_file .filename } `.
83
+
84
+ Your task is to identify potential issues and suggest concrete improvements.
85
+
86
+ Prioritize comments that highlight potential bugs, suggest improvements.
87
+
88
+ Avoid general comments that simply acknowledge correct code or good practices.
81
89
82
- { pr_comments }
90
+ Provide your review comments in the following format:
83
91
84
- Review the following code diff from file `{ diff_file .filename } ` and provide feedback.
85
- Point out potential issues, based on the guidelines and the previous PR comments history.
86
- Keep the review concise.
92
+ ```
93
+ line <line_number>: <comment>
94
+ line <line_number>: <comment>
95
+ ...and so on
96
+ ```
97
+
98
+ * **Adhere to Conventions:**
99
+ * Duration fields use `fooSeconds`.
100
+ * Condition types are `PascalCase`.
101
+ * Constants are `CamelCase`.
102
+ * No unsigned integers.
103
+ * Floating-point values are avoided in `spec`.
104
+ * Use `int32` unless `int64` is necessary.
105
+ * `Reason` is a one-word, `CamelCase` category of cause.
106
+ * `Message` is a human-readable phrase with specifics.
107
+ * Label keys are lowercase with dashes.
108
+ * Annotations are for tooling and extensions.
109
+ * **Compatibility:**
110
+ * Added fields must have non-nil default values in all API versions.
111
+ * New enum values must be handled safely by older clients.
112
+ * Validation rules on spec fields cannot be relaxed nor strengthened.
113
+ * Changes must be round-trippable with no loss of information.
114
+ * **Changes:**
115
+ * New fields should be optional and added in a new API version if possible.
116
+ * Singular fields should not be made plural without careful consideration of compatibility.
117
+ * Avoid renaming fields within the same API version.
118
+ * When adding new fields or enum values, use feature gates to control enablement and ensure compatibility with older API servers.
87
119
88
120
```diff
89
121
{ diff }
90
122
```
91
123
"""
92
- # print("total_tokens: ", model.count_tokens(prompt))
93
124
response = model .generate_content (prompt )
94
- return response .text if response .text else None
125
+ if response and response .text :
126
+ return response .text
127
+ else :
128
+ print ("=== Gemini Response (Empty) ===" )
129
+ return None
95
130
96
131
def post_github_review_comments (repo_name , pr_number , diff_file , review_comment , github_token ):
97
- """Posts review comments to a GitHub pull request, annotating specific lines."""
132
+ """Posts review comments to GitHub PR, annotating specific lines."""
133
+ global total_comments_posted # Declare total_comments_posted as global
98
134
g = Github (github_token )
99
135
repo = g .get_repo (repo_name )
100
136
pr = repo .get_pull (pr_number )
101
137
102
138
if review_comment :
103
139
commits = list (pr .get_commits ())
104
140
if not commits :
105
- print (f"WARNING: No commits found for PR { pr_number } . Posting general issue comment for { diff_file .filename } ." )
141
+ print (f"WARNING: No commits for PR { pr_number } . Posting general comment for { diff_file .filename } ." )
106
142
pr .create_issue_comment (f"Review for { diff_file .filename } :\n { review_comment } " )
107
143
return
108
144
109
145
latest_commit = commits [- 1 ]
146
+ diff_lines = diff_file .patch .splitlines ()
147
+
148
+ # Use regex to find line numbers and comments
149
+ line_comments = [(int (match .group (1 )), match .group (2 ).strip ())
150
+ for match in re .finditer (r"line (\d+): (.*)" , review_comment )]
151
+
152
+ for line_num , comment in line_comments :
153
+ if total_comments_posted >= MAX_COMMENTS :
154
+ print ("Comment limit reached." )
155
+ break
156
+ try :
157
+ corrected_line_num = None
158
+ right_side_line = 0
159
+ current_line = 0
160
+
161
+ for diff_line in diff_lines :
162
+ if diff_line .startswith ("@@" ):
163
+ # Extract right-side line number from hunk info
164
+ hunk_info = diff_line .split ("@@" )[1 ].strip ()
165
+ right_side_info = hunk_info .split ("+" )[1 ].split (" " )[0 ]
166
+ right_side_line = int (right_side_info .split ("," )[0 ])
167
+ current_line = right_side_line - 1
168
+
169
+ elif diff_line .startswith ("+" ):
170
+ current_line += 1
171
+ if current_line == line_num :
172
+ corrected_line_num = current_line
173
+ break
174
+
175
+ elif not diff_line .startswith ("-" ) and not diff_line .startswith ("@@" ): #count unchanged lines.
176
+ current_line += 1
177
+ if current_line == line_num :
178
+ corrected_line_num = current_line
179
+ break
180
+
181
+ if corrected_line_num :
182
+ pr .create_review_comment (
183
+ body = comment ,
184
+ commit = latest_commit ,
185
+ path = diff_file .filename ,
186
+ line = corrected_line_num ,
187
+ side = "RIGHT" ,
188
+ )
189
+ total_comments_posted += 1
190
+ print (f"Review comments for { diff_file .filename } posted." )
191
+ else :
192
+ print (f"WARNING: Could not find line { line_num } in { diff_file .filename } ." )
193
+ print (f"Diff file: { diff_file .filename } " )
194
+ print (f"Gemini comment: { comment } " )
195
+
196
+ except Exception as e :
197
+ print (f"ERROR: Failed to create comment for line { line_num } in { diff_file .filename } : { e } " )
110
198
111
- # Parse the review comment for line number annotations
112
- lines_to_comment = []
113
- for line in review_comment .split ('\n ' ):
114
- if "line" in line .lower () and ":" in line :
115
- try :
116
- line_num = int (line .lower ().split ("line" )[1 ].split (":" )[0 ].strip ())
117
- lines_to_comment .append (line_num )
118
- except ValueError :
119
- continue
120
-
121
- if lines_to_comment :
122
- for line_num in lines_to_comment :
123
- try :
124
- pr .create_review_comment (body = review_comment , commit = latest_commit , path = diff_file .filename , line = line_num , side = "RIGHT" )
125
- except Exception as e :
126
- print (f"ERROR: Failed to create review comment for line { line_num } in { diff_file .filename } : { e } " )
127
- print (f"Review comments for { diff_file .filename } posted successfully." )
128
- else :
129
- pr .create_issue_comment (f"Review for { diff_file .filename } :\n { review_comment } " )
130
- print (f"Review for { diff_file .filename } posted as general comment since no line number was found." )
131
199
else :
132
- print (f"Gemini API returned no response for { diff_file .filename } ." )
200
+ print (f"Gemini returned no response for { diff_file .filename } ." )
133
201
134
202
def main ():
135
- """Main function to orchestrate the Gemini PR review with annotations ."""
203
+ """Main function to orchestrate Gemini PR review."""
136
204
api_key = os .environ .get ('GEMINI_API_KEY' )
137
205
pr_number = int (os .environ .get ('PR_NUMBER' ))
138
206
repo_name = os .environ .get ('GITHUB_REPOSITORY' )
139
207
github_token = os .environ .get ('GITHUB_TOKEN' )
140
208
141
- # Use the GCS client library
142
209
guidelines = download_and_combine_guidelines ("hackathon-2025-sme-code-review-train" , "guidelines/" )
143
210
if not guidelines :
144
- print ("Warning: No guidelines loaded. Review will proceed without guidelines. " )
211
+ print ("Warning: No guidelines loaded." )
145
212
146
213
diff_files = get_pr_latest_commit_diff_files (repo_name , pr_number , github_token )
147
-
148
214
if diff_files is None :
149
- print ("Failed to retrieve PR diff files from latest commit . Exiting." )
215
+ print ("Failed to retrieve PR diff files. Exiting." )
150
216
return
151
217
152
218
pr_comments = download_and_combine_pr_comments ("hackathon-2025-sme-code-review-train" , "pr_comments/" )
153
- if not pr_comments :
154
- print ("Warning: No PR comments loaded. Review will proceed without PR comments history." )
155
219
156
220
for diff_file in diff_files :
157
221
review_comment = generate_gemini_review_with_annotations (diff_file , api_key , guidelines , pr_comments )
0 commit comments