Skip to content

Commit a6ac2e7

Browse files
committed
Speed up generate-code-scanning-query-list.py
Use 'codeql execute cli-server' to avoid repeated JVM startup overhead
1 parent 4f79398 commit a6ac2e7

File tree

1 file changed

+104
-69
lines changed

1 file changed

+104
-69
lines changed

misc/scripts/generate-code-scanning-query-list.py

Lines changed: 104 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import subprocess
22
import json
33
import csv
4+
import shutil
45
import sys
56
import os
67
import argparse
@@ -30,6 +31,39 @@
3031
languages = [ "cpp", "csharp", "go", "java", "javascript", "python"]
3132
packs = [ "code-scanning", "security-and-quality", "security-extended" ]
3233

34+
class CodeQL:
35+
def __init__(self):
36+
pass
37+
38+
def __enter__(self):
39+
self.proc = subprocess.Popen(['codeql', 'execute','cli-server'],
40+
executable=shutil.which('codeql'),
41+
stdin=subprocess.PIPE,
42+
stdout=subprocess.PIPE,
43+
stderr=sys.stderr,
44+
env=os.environ.copy(),
45+
)
46+
return self
47+
def __exit__(self, type, value, tb):
48+
self.proc.stdin.write(b'["shutdown"]\0')
49+
self.proc.stdin.close()
50+
try:
51+
self.proc.wait(5)
52+
except:
53+
self.proc.kill()
54+
55+
def command(self, args):
56+
data = json.dumps(args)
57+
data_bytes = data.encode('utf-8')
58+
self.proc.stdin.write(data_bytes)
59+
self.proc.stdin.write(b'\0')
60+
self.proc.stdin.flush()
61+
res = b''
62+
while True:
63+
b = self.proc.stdout.read(1)
64+
if b == b'\0':
65+
return res.decode('utf-8')
66+
res += b
3367

3468
def prefix_repo_nwo(filename):
3569
"""
@@ -98,73 +132,74 @@ def subprocess_run(cmd):
98132
print("Error: couldn't invoke 'git'. Is it on the path? Aborting.", file=sys.stderr)
99133
raise e
100134

101-
try: # Check for `codeql` on path
102-
subprocess_run(["codeql","--version"])
103-
except Exception as e:
104-
print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr)
105-
raise e
106-
107-
# Define CodeQL search path so it'll find the CodeQL repositories:
108-
# - anywhere in the current Git clone (including current working directory)
109-
# - the 'codeql' subdirectory of the cwd
110-
#
111-
# (and assumes the codeql-go repo is in a similar location)
112-
codeql_search_path = "./codeql:./codeql-go:." # will be extended further down
113-
114-
# Extend CodeQL search path by detecting root of the current Git repo (if any). This means that you
115-
# can run this script from any location within the CodeQL git repository.
116-
try:
117-
git_toplevel_dir = subprocess_run(["git","rev-parse","--show-toplevel"])
118-
119-
# Current working directory is in a Git repo. Add it to the search path, just in case it's the CodeQL repo
120-
git_toplevel_dir = git_toplevel_dir.stdout.strip()
121-
codeql_search_path += ":" + git_toplevel_dir + ":" + git_toplevel_dir + "/../codeql-go"
122-
except:
123-
# git rev-parse --show-toplevel exited with non-zero exit code. We're not in a Git repo
124-
pass
125-
126-
# Create CSV writer and write CSV header to stdout
127-
csvwriter = csv.writer(sys.stdout)
128-
csvwriter.writerow([
129-
"Query filename", "Suite", "Query name", "Query ID",
130-
"Kind", "Severity", "Precision", "Tags"
131-
])
132-
133-
# Iterate over all languages and packs, and resolve which queries are part of those packs
134-
for lang in languages:
135-
for pack in packs:
136-
# Get absolute paths to queries in this pack by using 'codeql resolve queries'
137-
try:
138-
queries_subp = subprocess_run(["codeql","resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)])
135+
with CodeQL() as codeql:
136+
try: # Check for `codeql` on path
137+
codeql.command(["--version"])
139138
except Exception as e:
140-
# Resolving queries might go wrong if the github/codeql and github/codeql-go repositories are not
141-
# on the search path.
142-
level = "Warning" if arguments.ignore_missing_query_packs else "Error"
143-
print(
144-
"%s: couldn't find query pack '%s' for language '%s'. Do you have the right repositories in the right places (search path: '%s')?" % (level, pack, lang, codeql_search_path),
145-
file=sys.stderr
146-
)
147-
if arguments.ignore_missing_query_packs:
148-
continue
149-
else:
150-
sys.exit("You can use '--ignore-missing-query-packs' to ignore this error")
151-
152-
# Investigate metadata for every query by using 'codeql resolve metadata'
153-
for queryfile in queries_subp.stdout.strip().split("\n"):
154-
query_metadata_json = subprocess_run(["codeql","resolve","metadata",queryfile]).stdout.strip()
155-
156-
# Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....)
157-
queryfile_nwo = prefix_repo_nwo(queryfile)
158-
159-
meta = json.loads(query_metadata_json)
160-
161-
# Python's CSV writer will automatically quote fields if necessary
162-
csvwriter.writerow([
163-
queryfile_nwo, pack,
164-
get_query_metadata('name', meta, queryfile_nwo),
165-
get_query_metadata('id', meta, queryfile_nwo),
166-
get_query_metadata('kind', meta, queryfile_nwo),
167-
get_query_metadata('problem.severity', meta, queryfile_nwo),
168-
get_query_metadata('precision', meta, queryfile_nwo),
169-
get_query_metadata('tags', meta, queryfile_nwo)
170-
])
139+
print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr)
140+
raise e
141+
142+
# Define CodeQL search path so it'll find the CodeQL repositories:
143+
# - anywhere in the current Git clone (including current working directory)
144+
# - the 'codeql' subdirectory of the cwd
145+
#
146+
# (and assumes the codeql-go repo is in a similar location)
147+
codeql_search_path = "./codeql:./codeql-go:." # will be extended further down
148+
149+
# Extend CodeQL search path by detecting root of the current Git repo (if any). This means that you
150+
# can run this script from any location within the CodeQL git repository.
151+
try:
152+
git_toplevel_dir = subprocess_run(["git","rev-parse","--show-toplevel"])
153+
154+
# Current working directory is in a Git repo. Add it to the search path, just in case it's the CodeQL repo
155+
git_toplevel_dir = git_toplevel_dir.stdout.strip()
156+
codeql_search_path += ":" + git_toplevel_dir + ":" + git_toplevel_dir + "/../codeql-go"
157+
except:
158+
# git rev-parse --show-toplevel exited with non-zero exit code. We're not in a Git repo
159+
pass
160+
161+
# Create CSV writer and write CSV header to stdout
162+
csvwriter = csv.writer(sys.stdout)
163+
csvwriter.writerow([
164+
"Query filename", "Suite", "Query name", "Query ID",
165+
"Kind", "Severity", "Precision", "Tags"
166+
])
167+
168+
# Iterate over all languages and packs, and resolve which queries are part of those packs
169+
for lang in languages:
170+
for pack in packs:
171+
# Get absolute paths to queries in this pack by using 'codeql resolve queries'
172+
try:
173+
queries_subp = codeql.command(["resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)])
174+
except Exception as e:
175+
# Resolving queries might go wrong if the github/codeql and github/codeql-go repositories are not
176+
# on the search path.
177+
level = "Warning" if arguments.ignore_missing_query_packs else "Error"
178+
print(
179+
"%s: couldn't find query pack '%s' for language '%s'. Do you have the right repositories in the right places (search path: '%s')?" % (level, pack, lang, codeql_search_path),
180+
file=sys.stderr
181+
)
182+
if arguments.ignore_missing_query_packs:
183+
continue
184+
else:
185+
sys.exit("You can use '--ignore-missing-query-packs' to ignore this error")
186+
187+
# Investigate metadata for every query by using 'codeql resolve metadata'
188+
for queryfile in queries_subp.strip().split("\n"):
189+
query_metadata_json = codeql.command(["resolve","metadata",queryfile]).strip()
190+
191+
# Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....)
192+
queryfile_nwo = prefix_repo_nwo(queryfile)
193+
194+
meta = json.loads(query_metadata_json)
195+
196+
# Python's CSV writer will automatically quote fields if necessary
197+
csvwriter.writerow([
198+
queryfile_nwo, pack,
199+
get_query_metadata('name', meta, queryfile_nwo),
200+
get_query_metadata('id', meta, queryfile_nwo),
201+
get_query_metadata('kind', meta, queryfile_nwo),
202+
get_query_metadata('problem.severity', meta, queryfile_nwo),
203+
get_query_metadata('precision', meta, queryfile_nwo),
204+
get_query_metadata('tags', meta, queryfile_nwo)
205+
])

0 commit comments

Comments
 (0)