|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +# Copyright (c) 2022 Vanessa Sochat and Ayoub Malek |
| 4 | +# This source code is licensed under the terms of the MIT license. |
| 5 | +# For a copy, see <https://opensource.org/licenses/MIT>. |
| 6 | + |
| 7 | +import argparse |
| 8 | +import re |
| 9 | +import os |
| 10 | +import sys |
| 11 | +import logging |
| 12 | + |
| 13 | +from urlchecker.main.github import clone_repo, delete_repo |
| 14 | +from urlchecker.core.fileproc import remove_empty |
| 15 | +from urlchecker.core.check import UrlChecker |
| 16 | +from urlchecker.logger import print_failure |
| 17 | + |
| 18 | +logger = logging.getLogger("urlchecker") |
| 19 | + |
| 20 | + |
| 21 | +def get_parser(): |
| 22 | + # Flatten parser to just be check command |
| 23 | + parser = argparse.ArgumentParser(description="urlchecker python pre-commit") |
| 24 | + parser.add_argument( |
| 25 | + "path", |
| 26 | + help="the local path or GitHub repository to clone and check", |
| 27 | + ) |
| 28 | + |
| 29 | + parser.add_argument( |
| 30 | + "-b", |
| 31 | + "--branch", |
| 32 | + help="if cloning, specify a branch to use (defaults to main)", |
| 33 | + default="main", |
| 34 | + ) |
| 35 | + |
| 36 | + parser.add_argument( |
| 37 | + "--subfolder", |
| 38 | + help="relative subfolder path within path (if not specified, we use root)", |
| 39 | + ) |
| 40 | + |
| 41 | + parser.add_argument( |
| 42 | + "--cleanup", |
| 43 | + help="remove root folder after checking (defaults to False, no cleaup)", |
| 44 | + default=False, |
| 45 | + action="store_true", |
| 46 | + ) |
| 47 | + |
| 48 | + parser.add_argument( |
| 49 | + "--force-pass", |
| 50 | + help="force successful pass (return code 0) regardless of result", |
| 51 | + default=False, |
| 52 | + action="store_true", |
| 53 | + ) |
| 54 | + |
| 55 | + parser.add_argument( |
| 56 | + "--no-print", |
| 57 | + help="Skip printing results to the screen (defaults to printing to console).", |
| 58 | + default=False, |
| 59 | + action="store_true", |
| 60 | + ) |
| 61 | + |
| 62 | + parser.add_argument( |
| 63 | + "--verbose", |
| 64 | + help="Print file names for failed urls in addition to the urls.", |
| 65 | + default=False, |
| 66 | + action="store_true", |
| 67 | + ) |
| 68 | + |
| 69 | + parser.add_argument( |
| 70 | + "--file-types", |
| 71 | + dest="file_types", |
| 72 | + help="comma separated list of file extensions to check (defaults to .md,.py)", |
| 73 | + default=".md,.py", |
| 74 | + ) |
| 75 | + |
| 76 | + # Here we separate out filenames (provided by pre-commit) and extra patterns |
| 77 | + # to filter over (--patterns) which is --files in the urlchecker |
| 78 | + parser.add_argument("filenames", nargs="*") |
| 79 | + parser.add_argument( |
| 80 | + "--patterns", |
| 81 | + dest="patterns", |
| 82 | + help="patterns to check.", |
| 83 | + default="", |
| 84 | + ) |
| 85 | + |
| 86 | + parser.add_argument( |
| 87 | + "--exclude-urls", |
| 88 | + help="comma separated links to exclude (no spaces)", |
| 89 | + default="", |
| 90 | + ) |
| 91 | + |
| 92 | + parser.add_argument( |
| 93 | + "--exclude-patterns", |
| 94 | + help="comma separated list of patterns to exclude (no spaces)", |
| 95 | + default="", |
| 96 | + ) |
| 97 | + |
| 98 | + parser.add_argument( |
| 99 | + "--exclude-files", |
| 100 | + help="comma separated list of files and patterns to exclude (no spaces)", |
| 101 | + default="", |
| 102 | + ) |
| 103 | + |
| 104 | + # Saving |
| 105 | + |
| 106 | + parser.add_argument( |
| 107 | + "--save", |
| 108 | + help="Path to a csv file to save results to.", |
| 109 | + default=None, |
| 110 | + ) |
| 111 | + |
| 112 | + # Timeouts |
| 113 | + |
| 114 | + parser.add_argument( |
| 115 | + "--retry-count", |
| 116 | + help="retry count upon failure (defaults to 2, one retry).", |
| 117 | + type=int, |
| 118 | + default=2, |
| 119 | + ) |
| 120 | + |
| 121 | + parser.add_argument( |
| 122 | + "--timeout", |
| 123 | + help="timeout (seconds) to provide to the requests library (defaults to 5)", |
| 124 | + type=int, |
| 125 | + default=5, |
| 126 | + ) |
| 127 | + return parser |
| 128 | + |
| 129 | + |
| 130 | +def check(args): |
| 131 | + """ |
| 132 | + Main entrypoint for running a check. We expect an args object with |
| 133 | + arguments from the main client. From here we determine the path |
| 134 | + to parse (or GitHub url to clone) and call the main check function |
| 135 | + under main/check.py |
| 136 | +
|
| 137 | + Args: |
| 138 | + - args : the argparse ArgParser with parsed args |
| 139 | + - extra : extra arguments not handled by the parser |
| 140 | + """ |
| 141 | + path = args.path |
| 142 | + |
| 143 | + # Case 1: specify present working directory |
| 144 | + if not path or path == ".": |
| 145 | + path = os.getcwd() |
| 146 | + |
| 147 | + # Case 2: git clone isn't supported for a pre-commit hook |
| 148 | + elif re.search("^(git@|http)", path): |
| 149 | + logging.error("Repository url %s detected, not supported for pre-commit hook.") |
| 150 | + return 1 |
| 151 | + |
| 152 | + # Add subfolder to path |
| 153 | + if args.subfolder: |
| 154 | + path = os.path.join(path, args.subfolder) |
| 155 | + |
| 156 | + # By the time we get here, a path must exist |
| 157 | + if not os.path.exists(path): |
| 158 | + logger.error("Error %s does not exist." % path) |
| 159 | + return 1 |
| 160 | + |
| 161 | + logging.debug("Path specified as present working directory, %s" % path) |
| 162 | + |
| 163 | + # Parse file types, and excluded urls and files (includes absolute and patterns) |
| 164 | + file_types = args.file_types.split(",") |
| 165 | + exclude_urls = remove_empty(args.exclude_urls.split(",")) |
| 166 | + exclude_patterns = remove_empty(args.exclude_patterns.split(",")) |
| 167 | + exclude_files = remove_empty(args.exclude_files.split(",")) |
| 168 | + |
| 169 | + # Do we have any patterns to filter (regular expressions)? |
| 170 | + patterns = None |
| 171 | + if args.patterns: |
| 172 | + logger.debug("Found patterns of files to filter to.") |
| 173 | + patterns = "(%s)" % "|".join(args.patterns) |
| 174 | + |
| 175 | + # Process the files |
| 176 | + files = [] |
| 177 | + for filename in args.filenames: |
| 178 | + if not filename or not os.path.exists(filename): |
| 179 | + logger.error("%s does not exist, skipping." % filename) |
| 180 | + continue |
| 181 | + if patterns and not re.search(patterns, filename): |
| 182 | + continue |
| 183 | + files.append(filename) |
| 184 | + |
| 185 | + # Alert user about settings |
| 186 | + print(" original path: %s" % args.path) |
| 187 | + print(" final path: %s" % path) |
| 188 | + print(" subfolder: %s" % args.subfolder) |
| 189 | + print(" branch: %s" % args.branch) |
| 190 | + print(" cleanup: %s" % args.cleanup) |
| 191 | + print(" file types: %s" % file_types) |
| 192 | + print(" files: %s" % files) |
| 193 | + print(" print all: %s" % (not args.no_print)) |
| 194 | + print(" verbose: %s" % (args.verbose)) |
| 195 | + print(" urls excluded: %s" % exclude_urls) |
| 196 | + print(" url patterns excluded: %s" % exclude_patterns) |
| 197 | + print(" file patterns excluded: %s" % exclude_files) |
| 198 | + print(" force pass: %s" % args.force_pass) |
| 199 | + print(" retry count: %s" % args.retry_count) |
| 200 | + print(" save: %s" % args.save) |
| 201 | + print(" timeout: %s" % args.timeout) |
| 202 | + |
| 203 | + # Instantiate a new checker with provided arguments |
| 204 | + checker = UrlChecker( |
| 205 | + path=path, |
| 206 | + file_types=file_types, |
| 207 | + include_patterns=files, |
| 208 | + exclude_files=exclude_files, |
| 209 | + print_all=not args.no_print, |
| 210 | + ) |
| 211 | + check_results = checker.run( |
| 212 | + exclude_urls=exclude_urls, |
| 213 | + exclude_patterns=exclude_patterns, |
| 214 | + retry_count=args.retry_count, |
| 215 | + timeout=args.timeout, |
| 216 | + ) |
| 217 | + |
| 218 | + # save results to file, if save indicated |
| 219 | + if args.save: |
| 220 | + checker.save_results(args.save) |
| 221 | + |
| 222 | + # Case 1: We didn't find any urls to check |
| 223 | + if not check_results["failed"] and not check_results["passed"]: |
| 224 | + print("\n\n\U0001F937. No urls were collected.") |
| 225 | + return 0 |
| 226 | + |
| 227 | + # Case 2: We had errors, print them for the user |
| 228 | + if check_results["failed"]: |
| 229 | + if args.verbose: |
| 230 | + print("\n\U0001F914 Uh oh... The following urls did not pass:") |
| 231 | + for file_name, result in checker.checks.items(): |
| 232 | + if result["failed"]: |
| 233 | + print_failure(file_name + ":") |
| 234 | + for url in result["failed"]: |
| 235 | + print_failure(" " + url) |
| 236 | + else: |
| 237 | + print("\n\U0001F914 Uh oh... The following urls did not pass:") |
| 238 | + for failed_url in check_results["failed"]: |
| 239 | + print_failure(failed_url) |
| 240 | + |
| 241 | + # If we have failures and it's not a force pass, exit with 1 |
| 242 | + if not args.force_pass and check_results["failed"]: |
| 243 | + return 1 |
| 244 | + |
| 245 | + # Finally, alert user if we are passing conditionally |
| 246 | + if check_results["failed"]: |
| 247 | + print("\n\U0001F928 Conditional pass force pass True.") |
| 248 | + else: |
| 249 | + print("\n\n\U0001F389 All URLS passed!") |
| 250 | + return 0 |
| 251 | + |
| 252 | + |
| 253 | +def main(argv: Sequence[str] | None = None) -> int: |
| 254 | + |
| 255 | + parser = get_parser() |
| 256 | + args = parser.parse_args(argv) |
| 257 | + |
| 258 | + # Get the return value to return to pre-commit |
| 259 | + return check(args) |
| 260 | + |
| 261 | + |
| 262 | +if __name__ == "__main__": |
| 263 | + raise SystemExit(main()) |
0 commit comments