|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +# Copyright 2021 Google |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | +"""Take warnings/error lines from stdin and turn them into PR file comments. |
| 17 | +
|
| 18 | +Usage: |
| 19 | +python pr_file_commenter.py -t github_token -p pr_number |
| 20 | + -T comment_tag [-r git_repo_url] [-C curl_command] |
| 21 | + [-P comment_prefix] [-S comment_suffix] [-d base_directory] |
| 22 | + < COMMENT_LINES |
| 23 | +
|
| 24 | +COMMENT_LINES should be a series of lines with the following format: |
| 25 | +path/to/first_filename:line_number: comment text |
| 26 | + optional comment text continuation |
| 27 | +path/to/second_filename:line_number: comment text |
| 28 | +
|
| 29 | +This script will scan through the comments and post any that fall into the diff |
| 30 | +range of the given PR as file comments on that PR. |
| 31 | +
|
| 32 | +If -r is unspecified, uses the current repo. |
| 33 | +""" |
| 34 | + |
| 35 | +import argparse |
| 36 | +from html import escape |
| 37 | +import json |
| 38 | +import os |
| 39 | +import re |
| 40 | +import subprocess |
| 41 | +import sys |
| 42 | +import time |
| 43 | +from unidiff import PatchSet |
| 44 | +import urllib.parse |
| 45 | + |
| 46 | +def main(): |
| 47 | + # This script performs a number of steps: |
| 48 | + # |
| 49 | + # 1. Get the PR's diff to find the list of affected files and lines in the PR. |
| 50 | + # |
| 51 | + # 2. Get the list of comments to post. Remove duplicates, and then |
| 52 | + # omit any comment that doesn't fall in the affected lines. |
| 53 | + # |
| 54 | + # 3. Delete any prior comments posted by previous runs. |
| 55 | + # |
| 56 | + # 4. Post any comments that fall within the range of the PR's diff. |
| 57 | + |
| 58 | + args = parse_cmdline_args() |
| 59 | + HIDDEN_COMMENT_TAG = '<hidden value="%s"></hidden>' % args.comment_tag |
| 60 | + if args.repo is None: |
| 61 | + args.repo=subprocess.check_output(['git', 'config', '--get', 'remote.origin.url']).decode('utf-8').rstrip('\n').lower() |
| 62 | + if args.verbose: |
| 63 | + print('autodetected repo: %s' % args.repo) |
| 64 | + if not args.repo.startswith('https://github.com/'): |
| 65 | + print('Error, only https://github.com/ repositories are allowed.') |
| 66 | + exit(2) |
| 67 | + (repo_owner, repo_name) = re.match(r'https://github\.com/([^/]+)/([^/.]+)', args.repo).groups() |
| 68 | + |
| 69 | + # Get the head commit for the pull request. |
| 70 | + # GET /repos/{owner}/{repo}/pulls/{pull_number} |
| 71 | + request_url = 'https://api.github.com/repos/%s/%s/pulls/%s' % (repo_owner, repo_name, args.pr_number) |
| 72 | + header = 'Accept: application/vnd.github.VERSION.json' |
| 73 | + pr_data = json.loads(subprocess.check_output( |
| 74 | + [args.curl, |
| 75 | + '-s', '-X', 'GET', |
| 76 | + '-H', 'Accept: application/vnd.github.v3+json', |
| 77 | + '-H', 'Authorization: token %s' % args.token, |
| 78 | + request_url |
| 79 | + ] + ([] if not args.verbose else ['-v'])).decode('utf-8').rstrip('\n')) |
| 80 | + |
| 81 | + commit_sha = pr_data['head']['sha'] |
| 82 | + if args.verbose: |
| 83 | + print('Commit sha:', commit_sha) |
| 84 | + |
| 85 | + # Get the diff for the pull request. |
| 86 | + # GET /repos/{owner}/{repo}/pulls/{pull_number} |
| 87 | + request_url = 'https://api.github.com/repos/%s/%s/pulls/%s' % (repo_owner, repo_name, args.pr_number) |
| 88 | + header = 'Accept: application/vnd.github.VERSION.diff' |
| 89 | + |
| 90 | + if args.verbose: |
| 91 | + print('request_url: %s' % request_url) |
| 92 | + |
| 93 | + pr_diff = subprocess.check_output( |
| 94 | + [args.curl, |
| 95 | + '-s', '-o', '-', '-w', '\nHTTP status %{http_code}\n', |
| 96 | + '-X', 'GET', |
| 97 | + '-H', header, |
| 98 | + '-H', 'Authorization: token %s' % args.token, |
| 99 | + request_url |
| 100 | + ] + ([] if not args.verbose else ['-v'])).decode('utf-8') |
| 101 | + # Parse the diff to determine the whether each source line is touched. |
| 102 | + # Only comment on lines that refer to parts of files that are diffed will be shown. |
| 103 | + # Information on what this means here: |
| 104 | + # https://docs.github.com/en/rest/reference/pulls#create-a-review-comment-for-a-pull-request |
| 105 | + valid_lines = {} |
| 106 | + file_list = [] |
| 107 | + pr_patch = PatchSet(pr_diff) |
| 108 | + for pr_patch_file in pr_patch: |
| 109 | + # Skip files that only remove code. |
| 110 | + if pr_patch_file.removed and not pr_patch_file.added: |
| 111 | + continue |
| 112 | + # Skip files that match an EXCLUDE_PATH_REGEX |
| 113 | + file_list.append(pr_patch_file.path) |
| 114 | + valid_lines[pr_patch_file.path] = set() |
| 115 | + for hunk in pr_patch_file: |
| 116 | + if hunk.target_length > 0: |
| 117 | + for line_number in range( |
| 118 | + hunk.target_start, |
| 119 | + hunk.target_start + hunk.target_length): |
| 120 | + # This line is modified by the diff, add it to the valid set of lines. |
| 121 | + valid_lines[pr_patch_file.path].add(line_number) |
| 122 | + |
| 123 | + # Now we also have a list of files in repo. |
| 124 | + # Get the comments from stdin. |
| 125 | + comment_data = sys.stdin.readlines() |
| 126 | + |
| 127 | + all_comments = [] |
| 128 | + in_comment = False |
| 129 | + for line in comment_data: |
| 130 | + # Match an line in this format: |
| 131 | + # path/to/file:line#: Message goes here |
| 132 | + m = re.match(r'([^:]+):([0-9]+): *(.*)$', line) |
| 133 | + if m: |
| 134 | + in_comment = True |
| 135 | + relative_filename = os.path.relpath(m.group(1), args.base_directory) |
| 136 | + all_comments.append({ |
| 137 | + 'filename': relative_filename, |
| 138 | + 'line': int(m.group(2)), |
| 139 | + 'text': '`%s`' % m.group(3)}) |
| 140 | + elif in_comment and line.startswith(' '): |
| 141 | + # Capture subsequent lines starting with space |
| 142 | + last_comment = all_comments.pop() |
| 143 | + last_comment['text'] += '\n`%s`' % line.rstrip('\n') |
| 144 | + all_comments.append(last_comment) |
| 145 | + else: |
| 146 | + # If any line begins with anything other than "path:#: " or a space, |
| 147 | + # we are no longer inside a comment. |
| 148 | + in_comment = False |
| 149 | + |
| 150 | + pr_comments = [] |
| 151 | + seen_comments = set() |
| 152 | + for comment in all_comments: |
| 153 | + if ('%s:%d:%s' % (comment['filename'], comment['line'], comment['text']) |
| 154 | + in seen_comments): |
| 155 | + # Don't add any comments already present. |
| 156 | + continue |
| 157 | + else: |
| 158 | + seen_comments.add('%s:%d:%s' % |
| 159 | + (comment['filename'], comment['line'], comment['text'])) |
| 160 | + |
| 161 | + if comment['filename'] in valid_lines: |
| 162 | + if comment['line'] in valid_lines[comment['filename']]: |
| 163 | + pr_comments.append(comment) |
| 164 | + elif args.fuzzy_lines != 0: |
| 165 | + # Search within +/- lines for a valid line. Prefer +. |
| 166 | + for try_line in range(1, args.fuzzy_lines+1): |
| 167 | + if comment['line'] + try_line in valid_lines[comment['filename']]: |
| 168 | + comment['adjust'] = try_line |
| 169 | + pr_comments.append(comment) |
| 170 | + break |
| 171 | + elif comment['line'] -try_line in valid_lines[comment['filename']]: |
| 172 | + comment['adjust'] = -try_line |
| 173 | + pr_comments.append(comment) |
| 174 | + break |
| 175 | + |
| 176 | + if args.verbose: |
| 177 | + print('Got %d relevant comments' % len(pr_comments)) |
| 178 | + |
| 179 | + # Next, get all existing review comments that we posted on the PR and delete them. |
| 180 | + comments_to_delete = [] |
| 181 | + page = 1 |
| 182 | + per_page=100 |
| 183 | + keep_reading = True |
| 184 | + while keep_reading: |
| 185 | + if args.verbose: |
| 186 | + print('Read page %d of comments' % page) |
| 187 | + request_url = 'https://api.github.com/repos/%s/%s/pulls/%s/comments?per_page=%d&page=%d' % (repo_owner, repo_name, args.pr_number, per_page, page) |
| 188 | + comments = json.loads(subprocess.check_output([args.curl, |
| 189 | + '-s', '-X', 'GET', |
| 190 | + '-H', 'Accept: application/vnd.github.v3+json', |
| 191 | + '-H', 'Authorization: token %s' % args.token, |
| 192 | + request_url]).decode('utf-8').rstrip('\n')) |
| 193 | + for comment in comments: |
| 194 | + if HIDDEN_COMMENT_TAG in comment['body']: |
| 195 | + comments_to_delete.append(comment['id']) |
| 196 | + page = page + 1 |
| 197 | + if len(comments) < per_page: |
| 198 | + # Stop once we're read less than a full page of comments. |
| 199 | + keep_reading = False |
| 200 | + if comments_to_delete: |
| 201 | + print('Delete previous comments:', comments_to_delete) |
| 202 | + for comment_id in comments_to_delete: |
| 203 | + # Delete all of these comments. |
| 204 | + # DELETE /repos/{owner}/{repo}/pulls/{pull_number}/comments |
| 205 | + request_url = 'https://api.github.com/repos/%s/%s/pulls/comments/%d' % (repo_owner, repo_name, comment_id) |
| 206 | + delete_output = subprocess.check_output([args.curl, |
| 207 | + '-s', '-X', 'DELETE', |
| 208 | + '-H', 'Accept: application/vnd.github.v3+json', |
| 209 | + '-H', 'Authorization: token %s' % args.token, |
| 210 | + request_url]).decode('utf-8').rstrip('\n') |
| 211 | + if len(pr_comments) > 0: |
| 212 | + comments_to_send = [] |
| 213 | + for pr_comment in pr_comments: |
| 214 | + # Post each comment. |
| 215 | + # POST /repos/{owner}/{repo}/pulls/{pull_number}/comments |
| 216 | + request_url = 'https://api.github.com/repos/%s/%s/pulls/%s/reviews' % (repo_owner, repo_name, args.pr_number) |
| 217 | + if 'adjust' in pr_comment: |
| 218 | + pr_comment['text'] = '`[%d line%s %s] %s' % ( |
| 219 | + abs(pr_comment['adjust']), |
| 220 | + '' if abs(pr_comment['adjust']) == 1 else 's', |
| 221 | + 'up' if pr_comment['adjust'] > 0 else 'down', |
| 222 | + pr_comment['text'].lstrip('`')) |
| 223 | + pr_comment['line'] += pr_comment['adjust'] |
| 224 | + comments_to_send.append({ |
| 225 | + 'body': (args.comment_prefix + |
| 226 | + pr_comment['text'] + |
| 227 | + args.comment_suffix + |
| 228 | + HIDDEN_COMMENT_TAG), |
| 229 | + 'path': pr_comment['filename'], |
| 230 | + 'line': pr_comment['line'], |
| 231 | + }) |
| 232 | + |
| 233 | + request_body = { |
| 234 | + 'commit_id': commit_sha, |
| 235 | + 'event': 'COMMENT', |
| 236 | + 'comments': comments_to_send |
| 237 | + } |
| 238 | + json_text = json.dumps(request_body) |
| 239 | + run_output = json.loads(subprocess.check_output([args.curl, |
| 240 | + '-s', '-X', 'POST', |
| 241 | + '-H', 'Accept: application/vnd.github.v3+json', |
| 242 | + '-H', 'Authorization: token %s' % args.token, |
| 243 | + request_url, '-d', json_text] |
| 244 | + + ([] if not args.verbose else ['-v'])).decode('utf-8').rstrip('\n')) |
| 245 | + if 'message' in run_output and 'errors' in run_output: |
| 246 | + print('%s error when posting comments:\n%s' % |
| 247 | + (run_output['message'], '\n'.join(run_output['errors']))) |
| 248 | + exit(1) |
| 249 | + else: |
| 250 | + print('Posted %d PR file comments successfully' % len(pr_comments)) |
| 251 | + |
| 252 | + else: |
| 253 | + print('No PR file comments to post.') |
| 254 | + exit(0) |
| 255 | + |
| 256 | +def parse_cmdline_args(): |
| 257 | + parser = argparse.ArgumentParser(description='Add log warnings/errors as PR comments.') |
| 258 | + parser.add_argument('-t', '--token', required=True, help='GitHub access token') |
| 259 | + parser.add_argument('-p', '--pr_number', required=True, help='Pull request number') |
| 260 | + parser.add_argument('-r', '--repo', metavar='URL', help='GitHub repo of the pull request, default is current repo') |
| 261 | + parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose mode') |
| 262 | + parser.add_argument('-C', '--curl', default='curl', metavar='COMMAND', help='Curl command to use for making request') |
| 263 | + parser.add_argument('-P', '--comment_prefix', default='', metavar='TEXT', help='Prefix for comment') |
| 264 | + parser.add_argument('-S', '--comment_suffix', default='', metavar='TEXT', help='Suffix for comment') |
| 265 | + parser.add_argument('-T', '--comment_tag', required=True, metavar='TAG', help='Hidden text, used to identify and delete old comments') |
| 266 | + parser.add_argument('-f', '--fuzzy_lines', default='0', metavar='COUNT', type=int, help='If comment lines are outside the diff, adjust them by up to this amount') |
| 267 | + parser.add_argument('-d', '--base_directory', default=os.curdir, metavar='DIRECTORY', help='Base directory to use for file relative paths') |
| 268 | + args = parser.parse_args() |
| 269 | + return args |
| 270 | + |
| 271 | + |
| 272 | +if __name__ == '__main__': |
| 273 | + main() |
0 commit comments