Skip to content

Commit f5022dc

Browse files
authored
Add generic PR file comment script, and use it to show docs errors. (#522)
Adds a generic pr_file_commenter.py script that takes warnings/errors from stdin and posts them as file comments in the PR. Use this script to add comments for errors in generated docs.
1 parent 26054b4 commit f5022dc

File tree

3 files changed

+282
-5
lines changed

3 files changed

+282
-5
lines changed

.github/workflows/checks.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ jobs:
5454
with:
5555
python-version: 3.7
5656
- name: Install prerequisites
57-
run: python scripts/gha/install_prereqs_desktop.py
57+
run: |
58+
python scripts/gha/install_prereqs_desktop.py
59+
python -m pip install unidiff
5860
- name: Generate headers
5961
run: |
6062
mkdir build
@@ -73,7 +75,9 @@ jobs:
7375
run: |
7476
if grep -Eq "error:|warning:" doxygen_errors.txt; then
7577
# Grep for warnings and print them out (replacing \n with %0A for github log)
76-
grep -E "error:|warning:|^ parameter" doxygen_errors.txt | sed ':a;N;$!ba;s/\n/%0A/g' | sed 's/^/::error ::DOXYGEN ERRORS: %0A/'
78+
grep -E "error:|warning:|^ parameter" doxygen_errors.txt > doxygen_errors_filtered.txt
79+
cat doxygen_errors_filtered.txt | sed ':a;N;$!ba;s/\n/%0A/g' | sed 's/^/::error ::DOXYGEN ERRORS: %0A/'
80+
python scripts/gha/pr_file_commenter.py -t ${{ github.token }} -p ${{ github.event.pull_request.number }} -T hidden-doxygen-comment-tag -P '📝 __Documentation issue:__ ' -S '' -f 10 < doxygen_errors_filtered.txt || true
7781
exit 1
7882
fi
7983

docs/Doxyfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ WARN_IF_DOC_ERROR = YES
3131
# WARN_IF_INCOMPLETE_DOC = YES
3232

3333

34-
# Filter all header files to remove everything between <SWIG> and </SWIG> tags.
35-
# This is in lieu of performing a full header scrub.
34+
# Filter all header files to blank out all lines between <SWIG> and </SWIG>
35+
# tags, inclusively. This is in lieu of performing a full header scrub.
3636
FILTER_SOURCE_FILES = YES
37-
INPUT_FILTER = "sed '/<SWIG>/,/<\/SWIG>/d'"
37+
INPUT_FILTER = "sed '/<SWIG>/,/<\/SWIG>/ s/.*//'"
3838

3939
# Space-separated list of input files.
4040
# This is commented out because the Doxygen script will append the correct list of headers here.

scripts/gha/pr_file_commenter.py

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2021 Google
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
"""Take warnings/error lines from stdin and turn them into PR file comments.
17+
18+
Usage:
19+
python pr_file_commenter.py -t github_token -p pr_number
20+
-T comment_tag [-r git_repo_url] [-C curl_command]
21+
[-P comment_prefix] [-S comment_suffix] [-d base_directory]
22+
< COMMENT_LINES
23+
24+
COMMENT_LINES should be a series of lines with the following format:
25+
path/to/first_filename:line_number: comment text
26+
optional comment text continuation
27+
path/to/second_filename:line_number: comment text
28+
29+
This script will scan through the comments and post any that fall into the diff
30+
range of the given PR as file comments on that PR.
31+
32+
If -r is unspecified, uses the current repo.
33+
"""
34+
35+
import argparse
36+
from html import escape
37+
import json
38+
import os
39+
import re
40+
import subprocess
41+
import sys
42+
import time
43+
from unidiff import PatchSet
44+
import urllib.parse
45+
46+
def main():
47+
# This script performs a number of steps:
48+
#
49+
# 1. Get the PR's diff to find the list of affected files and lines in the PR.
50+
#
51+
# 2. Get the list of comments to post. Remove duplicates, and then
52+
# omit any comment that doesn't fall in the affected lines.
53+
#
54+
# 3. Delete any prior comments posted by previous runs.
55+
#
56+
# 4. Post any comments that fall within the range of the PR's diff.
57+
58+
args = parse_cmdline_args()
59+
HIDDEN_COMMENT_TAG = '<hidden value="%s"></hidden>' % args.comment_tag
60+
if args.repo is None:
61+
args.repo=subprocess.check_output(['git', 'config', '--get', 'remote.origin.url']).decode('utf-8').rstrip('\n').lower()
62+
if args.verbose:
63+
print('autodetected repo: %s' % args.repo)
64+
if not args.repo.startswith('https://github.com/'):
65+
print('Error, only https://github.com/ repositories are allowed.')
66+
exit(2)
67+
(repo_owner, repo_name) = re.match(r'https://github\.com/([^/]+)/([^/.]+)', args.repo).groups()
68+
69+
# Get the head commit for the pull request.
70+
# GET /repos/{owner}/{repo}/pulls/{pull_number}
71+
request_url = 'https://api.github.com/repos/%s/%s/pulls/%s' % (repo_owner, repo_name, args.pr_number)
72+
header = 'Accept: application/vnd.github.VERSION.json'
73+
pr_data = json.loads(subprocess.check_output(
74+
[args.curl,
75+
'-s', '-X', 'GET',
76+
'-H', 'Accept: application/vnd.github.v3+json',
77+
'-H', 'Authorization: token %s' % args.token,
78+
request_url
79+
] + ([] if not args.verbose else ['-v'])).decode('utf-8').rstrip('\n'))
80+
81+
commit_sha = pr_data['head']['sha']
82+
if args.verbose:
83+
print('Commit sha:', commit_sha)
84+
85+
# Get the diff for the pull request.
86+
# GET /repos/{owner}/{repo}/pulls/{pull_number}
87+
request_url = 'https://api.github.com/repos/%s/%s/pulls/%s' % (repo_owner, repo_name, args.pr_number)
88+
header = 'Accept: application/vnd.github.VERSION.diff'
89+
90+
if args.verbose:
91+
print('request_url: %s' % request_url)
92+
93+
pr_diff = subprocess.check_output(
94+
[args.curl,
95+
'-s', '-o', '-', '-w', '\nHTTP status %{http_code}\n',
96+
'-X', 'GET',
97+
'-H', header,
98+
'-H', 'Authorization: token %s' % args.token,
99+
request_url
100+
] + ([] if not args.verbose else ['-v'])).decode('utf-8')
101+
# Parse the diff to determine the whether each source line is touched.
102+
# Only comment on lines that refer to parts of files that are diffed will be shown.
103+
# Information on what this means here:
104+
# https://docs.github.com/en/rest/reference/pulls#create-a-review-comment-for-a-pull-request
105+
valid_lines = {}
106+
file_list = []
107+
pr_patch = PatchSet(pr_diff)
108+
for pr_patch_file in pr_patch:
109+
# Skip files that only remove code.
110+
if pr_patch_file.removed and not pr_patch_file.added:
111+
continue
112+
# Skip files that match an EXCLUDE_PATH_REGEX
113+
file_list.append(pr_patch_file.path)
114+
valid_lines[pr_patch_file.path] = set()
115+
for hunk in pr_patch_file:
116+
if hunk.target_length > 0:
117+
for line_number in range(
118+
hunk.target_start,
119+
hunk.target_start + hunk.target_length):
120+
# This line is modified by the diff, add it to the valid set of lines.
121+
valid_lines[pr_patch_file.path].add(line_number)
122+
123+
# Now we also have a list of files in repo.
124+
# Get the comments from stdin.
125+
comment_data = sys.stdin.readlines()
126+
127+
all_comments = []
128+
in_comment = False
129+
for line in comment_data:
130+
# Match an line in this format:
131+
# path/to/file:line#: Message goes here
132+
m = re.match(r'([^:]+):([0-9]+): *(.*)$', line)
133+
if m:
134+
in_comment = True
135+
relative_filename = os.path.relpath(m.group(1), args.base_directory)
136+
all_comments.append({
137+
'filename': relative_filename,
138+
'line': int(m.group(2)),
139+
'text': '`%s`' % m.group(3)})
140+
elif in_comment and line.startswith(' '):
141+
# Capture subsequent lines starting with space
142+
last_comment = all_comments.pop()
143+
last_comment['text'] += '\n`%s`' % line.rstrip('\n')
144+
all_comments.append(last_comment)
145+
else:
146+
# If any line begins with anything other than "path:#: " or a space,
147+
# we are no longer inside a comment.
148+
in_comment = False
149+
150+
pr_comments = []
151+
seen_comments = set()
152+
for comment in all_comments:
153+
if ('%s:%d:%s' % (comment['filename'], comment['line'], comment['text'])
154+
in seen_comments):
155+
# Don't add any comments already present.
156+
continue
157+
else:
158+
seen_comments.add('%s:%d:%s' %
159+
(comment['filename'], comment['line'], comment['text']))
160+
161+
if comment['filename'] in valid_lines:
162+
if comment['line'] in valid_lines[comment['filename']]:
163+
pr_comments.append(comment)
164+
elif args.fuzzy_lines != 0:
165+
# Search within +/- lines for a valid line. Prefer +.
166+
for try_line in range(1, args.fuzzy_lines+1):
167+
if comment['line'] + try_line in valid_lines[comment['filename']]:
168+
comment['adjust'] = try_line
169+
pr_comments.append(comment)
170+
break
171+
elif comment['line'] -try_line in valid_lines[comment['filename']]:
172+
comment['adjust'] = -try_line
173+
pr_comments.append(comment)
174+
break
175+
176+
if args.verbose:
177+
print('Got %d relevant comments' % len(pr_comments))
178+
179+
# Next, get all existing review comments that we posted on the PR and delete them.
180+
comments_to_delete = []
181+
page = 1
182+
per_page=100
183+
keep_reading = True
184+
while keep_reading:
185+
if args.verbose:
186+
print('Read page %d of comments' % page)
187+
request_url = 'https://api.github.com/repos/%s/%s/pulls/%s/comments?per_page=%d&page=%d' % (repo_owner, repo_name, args.pr_number, per_page, page)
188+
comments = json.loads(subprocess.check_output([args.curl,
189+
'-s', '-X', 'GET',
190+
'-H', 'Accept: application/vnd.github.v3+json',
191+
'-H', 'Authorization: token %s' % args.token,
192+
request_url]).decode('utf-8').rstrip('\n'))
193+
for comment in comments:
194+
if HIDDEN_COMMENT_TAG in comment['body']:
195+
comments_to_delete.append(comment['id'])
196+
page = page + 1
197+
if len(comments) < per_page:
198+
# Stop once we're read less than a full page of comments.
199+
keep_reading = False
200+
if comments_to_delete:
201+
print('Delete previous comments:', comments_to_delete)
202+
for comment_id in comments_to_delete:
203+
# Delete all of these comments.
204+
# DELETE /repos/{owner}/{repo}/pulls/{pull_number}/comments
205+
request_url = 'https://api.github.com/repos/%s/%s/pulls/comments/%d' % (repo_owner, repo_name, comment_id)
206+
delete_output = subprocess.check_output([args.curl,
207+
'-s', '-X', 'DELETE',
208+
'-H', 'Accept: application/vnd.github.v3+json',
209+
'-H', 'Authorization: token %s' % args.token,
210+
request_url]).decode('utf-8').rstrip('\n')
211+
if len(pr_comments) > 0:
212+
comments_to_send = []
213+
for pr_comment in pr_comments:
214+
# Post each comment.
215+
# POST /repos/{owner}/{repo}/pulls/{pull_number}/comments
216+
request_url = 'https://api.github.com/repos/%s/%s/pulls/%s/reviews' % (repo_owner, repo_name, args.pr_number)
217+
if 'adjust' in pr_comment:
218+
pr_comment['text'] = '`[%d line%s %s] %s' % (
219+
abs(pr_comment['adjust']),
220+
'' if abs(pr_comment['adjust']) == 1 else 's',
221+
'up' if pr_comment['adjust'] > 0 else 'down',
222+
pr_comment['text'].lstrip('`'))
223+
pr_comment['line'] += pr_comment['adjust']
224+
comments_to_send.append({
225+
'body': (args.comment_prefix +
226+
pr_comment['text'] +
227+
args.comment_suffix +
228+
HIDDEN_COMMENT_TAG),
229+
'path': pr_comment['filename'],
230+
'line': pr_comment['line'],
231+
})
232+
233+
request_body = {
234+
'commit_id': commit_sha,
235+
'event': 'COMMENT',
236+
'comments': comments_to_send
237+
}
238+
json_text = json.dumps(request_body)
239+
run_output = json.loads(subprocess.check_output([args.curl,
240+
'-s', '-X', 'POST',
241+
'-H', 'Accept: application/vnd.github.v3+json',
242+
'-H', 'Authorization: token %s' % args.token,
243+
request_url, '-d', json_text]
244+
+ ([] if not args.verbose else ['-v'])).decode('utf-8').rstrip('\n'))
245+
if 'message' in run_output and 'errors' in run_output:
246+
print('%s error when posting comments:\n%s' %
247+
(run_output['message'], '\n'.join(run_output['errors'])))
248+
exit(1)
249+
else:
250+
print('Posted %d PR file comments successfully' % len(pr_comments))
251+
252+
else:
253+
print('No PR file comments to post.')
254+
exit(0)
255+
256+
def parse_cmdline_args():
257+
parser = argparse.ArgumentParser(description='Add log warnings/errors as PR comments.')
258+
parser.add_argument('-t', '--token', required=True, help='GitHub access token')
259+
parser.add_argument('-p', '--pr_number', required=True, help='Pull request number')
260+
parser.add_argument('-r', '--repo', metavar='URL', help='GitHub repo of the pull request, default is current repo')
261+
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose mode')
262+
parser.add_argument('-C', '--curl', default='curl', metavar='COMMAND', help='Curl command to use for making request')
263+
parser.add_argument('-P', '--comment_prefix', default='', metavar='TEXT', help='Prefix for comment')
264+
parser.add_argument('-S', '--comment_suffix', default='', metavar='TEXT', help='Suffix for comment')
265+
parser.add_argument('-T', '--comment_tag', required=True, metavar='TAG', help='Hidden text, used to identify and delete old comments')
266+
parser.add_argument('-f', '--fuzzy_lines', default='0', metavar='COUNT', type=int, help='If comment lines are outside the diff, adjust them by up to this amount')
267+
parser.add_argument('-d', '--base_directory', default=os.curdir, metavar='DIRECTORY', help='Base directory to use for file relative paths')
268+
args = parser.parse_args()
269+
return args
270+
271+
272+
if __name__ == '__main__':
273+
main()

0 commit comments

Comments
 (0)