|
| 1 | +import re |
| 2 | +import subprocess |
| 3 | +import json |
| 4 | +import csv |
| 5 | +import sys |
| 6 | +import os |
| 7 | + |
| 8 | +""" |
| 9 | +This script collects CodeQL queries that are part of code scanning query packs, |
| 10 | +renders the accompanying query help as markdown, inserts some useful metadata |
| 11 | +into the help, and adds a link to the query in the CodeQL repo. |
| 12 | +
|
| 13 | +This script requires that 'git' and 'codeql' commands |
| 14 | +are on the PATH. It'll try to automatically set the CodeQL search path correctly, |
| 15 | +as long as you run the script from one of the following locations: |
| 16 | + - anywhere from within a clone of the CodeQL Git repo |
| 17 | + - from the parent directory of a clone of the CodeQL Git repo (assuming 'codeql' |
| 18 | + and 'codeql-go' directories both exist) |
| 19 | +""" |
| 20 | + |
| 21 | +# Define which languages and query packs to consider |
| 22 | +languages = [ "cpp", "csharp", "go", "java", "javascript", "python"] |
| 23 | + |
| 24 | +# Query suites to generate help for - lgtm suite should cover the queries that users are interested in |
| 25 | +packs = ["lgtm"] |
| 26 | + |
| 27 | +def prefix_repo_nwo(filename): |
| 28 | + """ |
| 29 | + Replaces an absolute path prefix with a GitHub repository name with owner (NWO). |
| 30 | + This function relies on `git` being available. |
| 31 | + For example: |
| 32 | + /home/alice/git/ql/java/ql/src/MyQuery.ql |
| 33 | + becomes: |
| 34 | + github/codeql/java/ql/src/MyQuery.ql |
| 35 | +
|
| 36 | + If we can't detect a known NWO (e.g. github/codeql, github/codeql-go), the |
| 37 | + path will be truncated to the root of the git repo: |
| 38 | + ql/java/ql/src/MyQuery.ql |
| 39 | +
|
| 40 | + If the filename is not part of a Git repo, the return value is the |
| 41 | + same as the input value: the whole path. |
| 42 | + """ |
| 43 | + dirname = os.path.dirname(filename) |
| 44 | + |
| 45 | + try: |
| 46 | + git_toplevel_dir_subp = subprocess_run( |
| 47 | + ["git", "-C", dirname, "rev-parse", "--show-toplevel"]) |
| 48 | + except: |
| 49 | + # Not a Git repo |
| 50 | + return filename |
| 51 | + |
| 52 | + git_toplevel_dir = git_toplevel_dir_subp.stdout.strip() |
| 53 | + |
| 54 | + # Detect 'github/codeql' and 'github/codeql-go' repositories by checking the remote (it's a bit |
| 55 | + # of a hack but will work in most cases, as long as the remotes have 'codeql' and 'codeql-go' |
| 56 | + # in the URL |
| 57 | + git_remotes = subprocess_run( |
| 58 | + ["git", "-C", dirname, "remote", "-v"]).stdout.strip() |
| 59 | + |
| 60 | + if "codeql-go" in git_remotes: |
| 61 | + prefix = "github/codeql-go" |
| 62 | + elif "codeql" in git_remotes: |
| 63 | + prefix = "github/codeql" |
| 64 | + else: |
| 65 | + prefix = os.path.basename(git_toplevel_dir) |
| 66 | + |
| 67 | + return os.path.join(prefix, filename[len(git_toplevel_dir)+1:]) |
| 68 | + |
| 69 | + |
| 70 | +def single_spaces(input): |
| 71 | + """ |
| 72 | + Workaround for https://github.com/github/codeql-coreql-team/issues/470 which causes |
| 73 | + some metadata strings to contain newlines and spaces without a good reason. |
| 74 | + """ |
| 75 | + return " ".join(input.split()) |
| 76 | + |
| 77 | + |
| 78 | +def get_query_metadata(key, metadata, queryfile): |
| 79 | + """Returns query metadata or prints a warning to stderr if a particular piece of metadata is not available.""" |
| 80 | + if key in metadata: |
| 81 | + return single_spaces(metadata[key]) |
| 82 | + query_id = metadata['id'] if 'id' in metadata else 'unknown' |
| 83 | + print("Warning: no '%s' metadata for query with ID '%s' (%s)" % |
| 84 | + (key, query_id, queryfile), file=sys.stderr) |
| 85 | + return "" |
| 86 | + |
| 87 | + |
| 88 | +def subprocess_run(cmd): |
| 89 | + """Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0.""" |
| 90 | + return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True) |
| 91 | + |
| 92 | + |
| 93 | +try: # Check for `git` on path |
| 94 | + subprocess_run(["git", "--version"]) |
| 95 | +except Exception as e: |
| 96 | + print("Error: couldn't invoke 'git'. Is it on the path? Aborting.", file=sys.stderr) |
| 97 | + raise e |
| 98 | + |
| 99 | +try: # Check for `codeql` on path |
| 100 | + subprocess_run(["codeql", "--version"]) |
| 101 | +except Exception as e: |
| 102 | + print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr) |
| 103 | + raise e |
| 104 | + |
| 105 | +# Define CodeQL search path so it'll find the CodeQL repositories: |
| 106 | +# - anywhere in the current Git clone (including current working directory) |
| 107 | +# - the 'codeql' subdirectory of the cwd |
| 108 | +# |
| 109 | +# (and assumes the codeql-go repo is in a similar location) |
| 110 | + |
| 111 | +codeql_search_path = "./codeql:./codeql-go" # will be extended further down |
| 112 | +# Extend CodeQL search path by detecting root of the current Git repo (if any). This means that you |
| 113 | +# can run this script from any location within the CodeQL git repository. |
| 114 | +try: |
| 115 | + git_toplevel_dir = subprocess_run(["git", "rev-parse", "--show-toplevel"]) |
| 116 | + |
| 117 | + # Current working directory is in a Git repo. Add it to the search path, just in case it's the CodeQL repo |
| 118 | + #git_toplevel_dir = git_toplevel_dir.stdout.strip() |
| 119 | + codeql_search_path += ":" + git_toplevel_dir + ":" + git_toplevel_dir + "/../codeql-go" |
| 120 | + codeql_search_path = git_toplevel_dir = git_toplevel_dir.stdout.strip() |
| 121 | +except: |
| 122 | + # git rev-parse --show-toplevel exited with non-zero exit code. We're not in a Git repo |
| 123 | + pass |
| 124 | + |
| 125 | +# Iterate over all languages and packs, and resolve which queries are part of those packs |
| 126 | +for lang in languages: |
| 127 | + |
| 128 | + code_scanning_queries = subprocess_run( |
| 129 | + ["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-code-scanning.qls" % (lang)]).stdout.strip() |
| 130 | + security_extended_queries = subprocess_run( |
| 131 | + ["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-security-extended.qls" % (lang)]).stdout.strip() |
| 132 | + security_and_quality_queries = subprocess_run( |
| 133 | + ["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-security-and-quality.qls" % (lang)]).stdout.strip() |
| 134 | + # Define empty dictionary to store @name:filename pairs to generate alphabetically sorted Sphinx toctree |
| 135 | + index_file_dictionary = {} |
| 136 | + for pack in packs: |
| 137 | + # Get absolute paths to queries in this pack by using 'codeql resolve queries' |
| 138 | + try: |
| 139 | + |
| 140 | + queries_subp = subprocess_run( |
| 141 | + ["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)]) |
| 142 | + except Exception as e: |
| 143 | + # Resolving queries might go wrong if the github/codeql and github/codeql-go repositories are not |
| 144 | + # on the search path. |
| 145 | + print( |
| 146 | + "Warning: couldn't find query pack '%s' for language '%s'. Do you have the right repositories in the right places (search path: '%s')?" % ( |
| 147 | + pack, lang, codeql_search_path), |
| 148 | + file=sys.stderr |
| 149 | + ) |
| 150 | + continue |
| 151 | + |
| 152 | + # Define empty dictionary to store @name:filename pairs to generate alphabetically sorted Sphinx toctree later |
| 153 | + index_file_dictionary = {} |
| 154 | + |
| 155 | + # Investigate metadata for every query by using 'codeql resolve metadata' |
| 156 | + for queryfile in queries_subp.stdout.strip().split("\n"): |
| 157 | + query_metadata_json = subprocess_run( |
| 158 | + ["codeql", "resolve", "metadata", queryfile]).stdout.strip() |
| 159 | + meta = json.loads(query_metadata_json) |
| 160 | + |
| 161 | + # Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....) |
| 162 | + queryfile_nwo = prefix_repo_nwo(queryfile) |
| 163 | + |
| 164 | + # Generate the query help for each query |
| 165 | + try: |
| 166 | + query_help = subprocess_run( |
| 167 | + ["codeql", "generate", "query-help", "--format=markdown", "--warnings=error", queryfile]).stdout.strip() |
| 168 | + except: |
| 169 | + # Print a message if generate query help fails |
| 170 | + print("Failed to generate query help for '%s'" % (queryfile_nwo)) |
| 171 | + continue |
| 172 | + |
| 173 | + # Pull out relevant query metadata properties that we want to display in the query help |
| 174 | + query_name_meta = get_query_metadata('name', meta, queryfile) |
| 175 | + query_description = get_query_metadata( |
| 176 | + 'description', meta, queryfile) |
| 177 | + query_id = "ID: " + \ |
| 178 | + get_query_metadata('id', meta, queryfile) + "\n" |
| 179 | + query_kind = "Kind: " + \ |
| 180 | + get_query_metadata('kind', meta, queryfile) + "\n" |
| 181 | + query_severity = "Severity: " + \ |
| 182 | + get_query_metadata('problem.severity', meta, queryfile) + "\n" |
| 183 | + query_precision = "Precision: " + \ |
| 184 | + get_query_metadata('precision', meta, queryfile) + "\n" |
| 185 | + query_tags = "Tags:\n - " + \ |
| 186 | + get_query_metadata('tags', meta, queryfile).replace(" ", "\n - ") + "\n" |
| 187 | + |
| 188 | + # Build a link to the query source file for display in the query help |
| 189 | + if "go" in prefix_repo_nwo(queryfile): |
| 190 | + transform_link = prefix_repo_nwo(queryfile).replace( |
| 191 | + "codeql-go", "codeql-go/tree/main").replace(" ", "%20").replace("\\", "/") |
| 192 | + else: |
| 193 | + transform_link = prefix_repo_nwo(queryfile).replace( |
| 194 | + "codeql", "codeql/tree/main").replace(" ", "%20").replace("\\", "/") |
| 195 | + query_link = "[Click to see the query in the CodeQL repository](https://github.com/" + \ |
| 196 | + transform_link + ")\n" |
| 197 | + |
| 198 | + if queryfile in code_scanning_queries: |
| 199 | + cs_suites = ' - ' + lang +'-code-scanning.qls\n' |
| 200 | + else: |
| 201 | + cs_suites = "" |
| 202 | + if queryfile in security_extended_queries: |
| 203 | + se_suites = ' - ' + lang + '-security-extended.qls\n' |
| 204 | + else: |
| 205 | + se_suites = "" |
| 206 | + if queryfile in security_and_quality_queries: |
| 207 | + sq_suites = ' - ' +lang + '-security-and-quality.qls\n' |
| 208 | + else: |
| 209 | + sq_Suites = "" |
| 210 | + |
| 211 | + if queryfile in code_scanning_queries or queryfile in security_extended_queries or queryfile in security_and_quality_queries: |
| 212 | + suites_list = "Query suites:\n" + cs_suites + se_suites + sq_suites |
| 213 | + else: |
| 214 | + suites_list = "" |
| 215 | + |
| 216 | + # Join metadata into a literal block and add query link below |
| 217 | + meta_string = "\n"*2 + "```\n" + query_id + query_kind + query_severity + \ |
| 218 | + query_precision + query_tags + suites_list + "```\n\n" + query_link + "\n" |
| 219 | + |
| 220 | + # Insert metadata block into query help directly under title |
| 221 | + full_help = query_help.replace("\n", meta_string, 1) |
| 222 | + |
| 223 | + # Use id property to make name for markdown file, replacing any "/" characters with "-" |
| 224 | + query_name = query_id[4:-1].replace("/", "-") |
| 225 | + |
| 226 | + # Populate index_file_dictionary with @name extracted from metadata and corresponding query filename |
| 227 | + index_file_dictionary[query_name_meta] = lang + "/" + query_name |
| 228 | + |
| 229 | + # Make paths for output of the form: query-help-markdown/<lang>/<queryfile>.md |
| 230 | + docs_dir = 'query-help' |
| 231 | + md_dir_path = os.path.join(docs_dir, lang) |
| 232 | + md_file_path = os.path.join(md_dir_path, query_name + ".md") |
| 233 | + |
| 234 | + # Make directories for output paths they don't already exist |
| 235 | + if not os.path.isdir(md_dir_path): |
| 236 | + os.makedirs(md_dir_path) |
| 237 | + |
| 238 | + # Generate query help at chosen path if output file doesn't already exist |
| 239 | + if not os.path.exists(md_file_path): |
| 240 | + file = open(md_file_path, "x") |
| 241 | + file.write(full_help) |
| 242 | + file.close() |
| 243 | + |
| 244 | + # Sort index_file_dictionary alphabetically by @name key, and create column of filename values |
| 245 | + sorted_index = dict(sorted(index_file_dictionary.items())) |
| 246 | + sorted_index = ("\n" + " ").join(sorted_index.values()) |
| 247 | + |
| 248 | + # Add directives to make sorted_index a valid toctree for sphinx source files |
| 249 | + toc_directive = ".. toctree::\n :titlesonly:\n\n " |
| 250 | + toc_include = toc_directive + sorted_index |
| 251 | + |
| 252 | + # Write toctree to rst |
| 253 | + toc_file = os.path.join(docs_dir, "toc-" + lang + ".rst") |
| 254 | + file = open(toc_file, "x") |
| 255 | + file.write(toc_include) |
| 256 | + file.close() |
0 commit comments