Skip to content

Commit 39a3623

Browse files
committed
MaD generator: change default thread and ram
The standalone MaD generator now uses `0` for threads and throttles the RAM to use 2GB per thread by default. Also, replaced the hand-written argument parsing with `argparse`.
1 parent 7db31b0 commit 39a3623

File tree

2 files changed

+50
-89
lines changed

2 files changed

+50
-89
lines changed

misc/scripts/models-as-data/bulk_generate_mad.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,9 @@ def generate_models(config, args, project: Project, database_dir: str) -> None:
242242
generator.generateSinks = should_generate_sinks(project)
243243
generator.generateSources = should_generate_sources(project)
244244
generator.generateSummaries = should_generate_summaries(project)
245-
generator.setenvironment(database=database_dir, folder=name)
246245
generator.threads = args.codeql_threads
247246
generator.ram = args.codeql_ram
247+
generator.setenvironment(database=database_dir, folder=name)
248248
generator.run()
249249

250250

@@ -527,7 +527,7 @@ def main(config, args) -> None:
527527
parser.add_argument(
528528
"--codeql-ram",
529529
type=int,
530-
help="What `--ram` value to pass to `codeql` while generating models (by default the flag is not passed)",
530+
help="What `--ram` value to pass to `codeql` while generating models (by default 2048 MB per thread)",
531531
default=None,
532532
)
533533
parser.add_argument(

misc/scripts/models-as-data/generate_mad.py

Lines changed: 48 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import sys
88
import tempfile
99
import re
10+
import argparse
1011

1112
def quote_if_needed(row):
1213
if row != "true" and row != "false":
@@ -26,112 +27,76 @@ def parseData(data):
2627
return rows
2728

2829

29-
def printHelp():
30-
print(f"""Usage:
31-
python3 generate_mad.py <library-database> [DIR] --language LANGUAGE [--with-sinks] [--with-sources] [--with-summaries] [--with-neutrals] [--with-typebased-summaries] [--dry-run]
32-
30+
description = """\
3331
This generates summary, source, sink and neutral models for the code in the database.
34-
The files will be placed in `LANGUAGE/ql/lib/ext/generated/DIR`
35-
36-
Which models are generated is controlled by the flags:
37-
--with-sinks
38-
--with-sources
39-
--with-summaries
40-
--with-neutrals
41-
--with-typebased-summaries (Experimental)
42-
If none of these flags are specified, all models are generated except for the type based models.
43-
44-
--dry-run: Only run the queries, but don't write to file.
32+
The files will be placed in `LANGUAGE/ql/lib/ext/generated/DIR`"""
4533

34+
epilog = """\
4635
Example invocations:
4736
$ python3 generate_mad.py /tmp/dbs/my_library_db
4837
$ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks
4938
$ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks my_directory
5039
51-
52-
Requirements: `codeql` should appear on your path.
53-
""")
40+
Requirements: `codeql` should appear on your path."""
5441

5542
class Generator:
56-
def __init__(self, language):
43+
generateSinks = False
44+
generateSources = False
45+
generateSummaries = False
46+
generateNeutrals = False
47+
generateTypeBasedSummaries = False
48+
dryRun = False
49+
dirname = "modelgenerator"
50+
ram = None
51+
threads = 0
52+
folder = ""
53+
54+
def __init__(self, language=None):
5755
self.language = language
58-
self.generateSinks = False
59-
self.generateSources = False
60-
self.generateSummaries = False
61-
self.generateNeutrals = False
62-
self.generateTypeBasedSummaries = False
63-
self.dryRun = False
64-
self.dirname = "modelgenerator"
65-
self.ram = 2**15
66-
self.threads = 8
67-
68-
69-
def setenvironment(self, database, folder):
56+
57+
def setenvironment(self, database=None, folder=None):
7058
self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
71-
self.database = database
59+
self.database = database or self.database
60+
self.folder = folder or self.folder
7261
self.generatedFrameworks = os.path.join(
73-
self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{folder}")
62+
self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{self.folder}")
7463
self.workDir = tempfile.mkdtemp()
64+
if self.ram is None:
65+
threads = self.threads if self.threads > 0 else os.cpu_count()
66+
self.ram = 2048 * threads
7567
os.makedirs(self.generatedFrameworks, exist_ok=True)
7668

7769

7870
@staticmethod
7971
def make():
80-
# Create a generator instance based on command line arguments.
81-
if any(s == "--help" for s in sys.argv):
82-
printHelp()
83-
sys.exit(0)
84-
85-
if "--language" in sys.argv:
86-
language = sys.argv[sys.argv.index("--language") + 1]
87-
sys.argv.remove("--language")
88-
sys.argv.remove(language)
89-
else:
90-
printHelp()
91-
sys.exit(0)
92-
93-
generator = Generator(language=language)
94-
95-
if "--with-sinks" in sys.argv:
96-
sys.argv.remove("--with-sinks")
97-
generator.generateSinks = True
98-
99-
if "--with-sources" in sys.argv:
100-
sys.argv.remove("--with-sources")
101-
generator.generateSources = True
102-
103-
if "--with-summaries" in sys.argv:
104-
sys.argv.remove("--with-summaries")
105-
generator.generateSummaries = True
106-
107-
if "--with-neutrals" in sys.argv:
108-
sys.argv.remove("--with-neutrals")
109-
generator.generateNeutrals = True
110-
111-
if "--with-typebased-summaries" in sys.argv:
112-
sys.argv.remove("--with-typebased-summaries")
113-
generator.generateTypeBasedSummaries = True
114-
115-
if "--dry-run" in sys.argv:
116-
sys.argv.remove("--dry-run")
117-
generator.dryRun = True
72+
p = argparse.ArgumentParser(
73+
description=description,
74+
formatter_class=argparse.RawTextHelpFormatter,
75+
epilog=epilog)
76+
p.add_argument("database", help="Path to the CodeQL database")
77+
p.add_argument("folder", nargs="?", default="", help="Optional folder to place the generated files in")
78+
p.add_argument("--language", required=True, help="The language for which to generate models")
79+
p.add_argument("--with-sinks", action="store_true", help="Generate sink models", dest="generateSinks")
80+
p.add_argument("--with-sources", action="store_true", help="Generate source models", dest="generateSources")
81+
p.add_argument("--with-summaries", action="store_true", help="Generate summary models", dest="generateSummaries")
82+
p.add_argument("--with-neutrals", action="store_true", help="Generate neutral models", dest="generateNeutrals")
83+
p.add_argument("--with-typebased-summaries", action="store_true", help="Generate type-based summary models (experimental)", dest="generateTypeBasedSummaries")
84+
p.add_argument("--dry-run", action="store_true", help="Do not write the generated files, just print them to stdout", dest="dryRun")
85+
p.add_argument("--threads", type=int, default=Generator.threads, help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.")
86+
p.add_argument("--ram", type=int, help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.")
87+
generator = p.parse_args(namespace=Generator())
11888

11989
if (not generator.generateSinks and
12090
not generator.generateSources and
12191
not generator.generateSummaries and
12292
not generator.generateNeutrals and
12393
not generator.generateTypeBasedSummaries):
124-
generator.generateSinks = generator.generateSources = generator.generateSummaries = generator.generateNeutrals = True
125-
126-
n = len(sys.argv)
127-
if n < 2:
128-
printHelp()
129-
sys.exit(1)
130-
elif n == 2:
131-
generator.setenvironment(sys.argv[1], "")
132-
else:
133-
generator.setenvironment(sys.argv[1], sys.argv[2])
94+
generator.generateSinks = True
95+
generator.generateSources = True
96+
generator.generateSummaries = True
97+
generator.generateNeutrals = True
13498

99+
generator.setenvironment()
135100
return generator
136101

137102

@@ -140,11 +105,7 @@ def runQuery(self, query):
140105
queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query)
141106
resultBqrs = os.path.join(self.workDir, "out.bqrs")
142107

143-
cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs]
144-
if self.threads is not None:
145-
cmd += ["--threads", str(self.threads)]
146-
if self.ram is not None:
147-
cmd += ["--ram", str(self.ram)]
108+
cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs, "--threads", str(self.threads), "--ram", str(self.ram)]
148109
helpers.run_cmd(cmd, "Failed to generate " + query)
149110

150111
return helpers.readData(self.workDir, resultBqrs)
@@ -220,4 +181,4 @@ def run(self):
220181
self.save(typeBasedContent, ".typebased.model.yml")
221182

222183
if __name__ == '__main__':
223-
Generator.make().run()
184+
Generator.make().run()

0 commit comments

Comments
 (0)