Skip to content

Commit 2cd5881

Browse files
committed
Swift: skip QL code generation on untouched files
This is a developer QoL improvement, where running codegen will skip writing (and especially formatting) any files that were not changed. **Why?** While code generation in itself was pretty much instant, QL formatting of generated code was starting to take a long time. This made unconditionally running codegen quite annoying, for example before each test run as part of an IDE workflow or as part of the pre-commit hook. **How?** This was not completely straightforward as we could not work with the contents of the file prior to code generation as that was already post-processed by the QL formatting, so we had no chance of comparing the output of template rendering with that. We therefore store the hashes of the files _prior_ to QL formatting in a checked-in file (`swift/ql/.generated.list`). We can therefore load those hashes at the beginning of code generation, use them to compare the template rendering output and update them in this special registry file. **What else?** We also extend this mechanism to detect accidental modification of generated files in a more robust way. Before this patch, we were doing it with a rough regexp based heuristic. Now, we just store the hashes of the files _after_ QL formatting in the same checked file, so we can check that and stop generation if a generated file was modified, or a stub was modified without removing the `// generated` header.
1 parent 0796926 commit 2cd5881

File tree

9 files changed

+1323
-209
lines changed

9 files changed

+1323
-209
lines changed

.github/workflows/swift.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
- 'swift/ql/lib/codeql/swift/elements/**'
4040
- 'swift/ql/lib/codeql/swift/generated/**'
4141
- 'swift/ql/test/extractor-tests/generated/**'
42+
- 'swift/ql/.generated.list'
4243
ql:
4344
- 'github/workflows/swift.yml'
4445
- 'swift/**/*.ql'

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ repos:
4444

4545
- id: swift-codegen
4646
name: Run Swift checked in code generation
47-
files: ^swift/(schema.py$|codegen/|.*/generated/|ql/lib/(swift\.dbscheme$|codeql/swift/elements))
47+
files: ^swift/(schema.py$|codegen/|.*/generated/|ql/lib/(swift\.dbscheme$|codeql/swift/elements)|ql/\.generated.list)
4848
language: system
4949
entry: bazel run //swift/codegen -- --quiet
5050
pass_filenames: false

swift/codegen/codegen.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,13 @@ def _parse_args() -> argparse.Namespace:
4040
p.add_argument("--codeql-binary", default="codeql", help="command to use for QL formatting (default %(default)s)")
4141
p.add_argument("--cpp-output", type=_abspath,
4242
help="output directory for generated C++ files, required if trap or cpp is provided to --generate")
43+
p.add_argument("--generated-registry", type=_abspath, default=paths.swift_dir / "ql/.generated.list",
44+
help="registry file containing information about checked-in generated code")
4345
return p.parse_args()
4446

4547

46-
def _abspath(x: str) -> pathlib.Path:
47-
return pathlib.Path(x).resolve()
48+
def _abspath(x: str) -> typing.Optional[pathlib.Path]:
49+
return pathlib.Path(x).resolve() if x else None
4850

4951

5052
def run():
@@ -56,9 +58,8 @@ def run():
5658
else:
5759
log_level = logging.INFO
5860
logging.basicConfig(format="{levelname} {message}", style='{', level=log_level)
59-
exe_path = paths.exe_file.relative_to(opts.swift_dir)
6061
for target in opts.generate:
61-
generate(target, opts, render.Renderer(exe_path))
62+
generate(target, opts, render.Renderer(opts.swift_dir))
6263

6364

6465
if __name__ == "__main__":

swift/codegen/generators/qlgen.py

Lines changed: 83 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -216,32 +216,11 @@ def get_classes_used_by(cls: ql.Class) -> typing.List[str]:
216216
return sorted(set(t for t in get_types_used_by(cls) if t[0].isupper() and t != cls.name))
217217

218218

219-
_generated_stub_re = re.compile(r"\n*private import .*\n+class \w+ extends Generated::\w+ \{[ \n]?\}", re.MULTILINE)
220-
221-
222-
def _is_generated_stub(file: pathlib.Path) -> bool:
223-
with open(file) as contents:
224-
for line in contents:
225-
if not line.startswith("// generated"):
226-
return False
227-
break
228-
else:
229-
# no lines
230-
return False
231-
# we still do not detect modified synth constructors
232-
if not file.name.endswith("Constructor.qll"):
233-
# one line already read, if we can read 5 other we are past the normal stub generation
234-
line_threshold = 5
235-
first_lines = list(itertools.islice(contents, line_threshold))
236-
if len(first_lines) == line_threshold or not _generated_stub_re.match("".join(first_lines)):
237-
raise ModifiedStubMarkedAsGeneratedError(
238-
f"{file.name} stub was modified but is still marked as generated")
239-
return True
240-
241-
242219
def format(codeql, files):
243-
format_cmd = [codeql, "query", "format", "--in-place", "--"]
244-
format_cmd.extend(str(f) for f in files if f.suffix in (".qll", ".ql"))
220+
ql_files = [str(f) for f in files if f.suffix in (".qll", ".ql")]
221+
if not ql_files:
222+
return
223+
format_cmd = [codeql, "query", "format", "--in-place", "--"] + ql_files
245224
res = subprocess.run(format_cmd, stderr=subprocess.PIPE, text=True)
246225
if res.returncode:
247226
for line in res.stderr.splitlines():
@@ -307,11 +286,14 @@ def generate(opts, renderer):
307286
stub_out = opts.ql_stub_output
308287
test_out = opts.ql_test_output
309288
missing_test_source_filename = "MISSING_SOURCE.txt"
289+
include_file = stub_out.with_suffix(".qll")
290+
291+
generated = {q for q in out.rglob("*.qll")}
292+
generated.add(include_file)
293+
generated.update(q for q in test_out.rglob("*.ql"))
294+
generated.update(q for q in test_out.rglob(missing_test_source_filename))
310295

311-
existing = {q for q in out.rglob("*.qll")}
312-
existing |= {q for q in stub_out.rglob("*.qll") if _is_generated_stub(q)}
313-
existing |= {q for q in test_out.rglob("*.ql")}
314-
existing |= {q for q in test_out.rglob(missing_test_source_filename)}
296+
stubs = {q for q in stub_out.rglob("*.qll")}
315297

316298
data = schema.load_file(input)
317299

@@ -324,77 +306,75 @@ def generate(opts, renderer):
324306

325307
imports = {}
326308

327-
db_classes = [cls for cls in classes.values() if not cls.ipa]
328-
renderer.render(ql.DbClasses(db_classes), out / "Raw.qll")
329-
330-
classes_by_dir_and_name = sorted(classes.values(), key=lambda cls: (cls.dir, cls.name))
331-
for c in classes_by_dir_and_name:
332-
imports[c.name] = get_import(stub_out / c.path, opts.swift_dir)
333-
334-
for c in classes.values():
335-
qll = out / c.path.with_suffix(".qll")
336-
c.imports = [imports[t] for t in get_classes_used_by(c)]
337-
renderer.render(c, qll)
338-
stub_file = stub_out / c.path.with_suffix(".qll")
339-
if not stub_file.is_file() or _is_generated_stub(stub_file):
340-
stub = ql.Stub(
341-
name=c.name, base_import=get_import(qll, opts.swift_dir))
342-
renderer.render(stub, stub_file)
343-
344-
# for example path/to/elements -> path/to/elements.qll
345-
include_file = stub_out.with_suffix(".qll")
346-
renderer.render(ql.ImportList(list(imports.values())), include_file)
347-
348-
renderer.render(ql.GetParentImplementation(list(classes.values())), out / 'ParentChild.qll')
349-
350-
for c in data.classes.values():
351-
if _should_skip_qltest(c, data.classes):
352-
continue
353-
test_dir = test_out / c.group / c.name
354-
test_dir.mkdir(parents=True, exist_ok=True)
355-
if not any(test_dir.glob("*.swift")):
356-
log.warning(f"no test source in {test_dir.relative_to(test_out)}")
357-
renderer.render(ql.MissingTestInstructions(),
358-
test_dir / missing_test_source_filename)
359-
continue
360-
total_props, partial_props = _partition(_get_all_properties_to_be_tested(c, data.classes),
361-
lambda p: p.is_single or p.is_predicate)
362-
renderer.render(ql.ClassTester(class_name=c.name,
363-
properties=total_props,
364-
# in case of collapsed hierarchies we want to see the actual QL class in results
365-
show_ql_class="qltest_collapse_hierarchy" in c.pragmas),
366-
test_dir / f"{c.name}.ql")
367-
for p in partial_props:
368-
renderer.render(ql.PropertyTester(class_name=c.name,
369-
property=p), test_dir / f"{c.name}_{p.getter}.ql")
370-
371-
final_ipa_types = []
372-
non_final_ipa_types = []
373-
constructor_imports = []
374-
ipa_constructor_imports = []
375-
stubs = {}
376-
for cls in sorted(data.classes.values(), key=lambda cls: (cls.group, cls.name)):
377-
ipa_type = get_ql_ipa_class(cls)
378-
if ipa_type.is_final:
379-
final_ipa_types.append(ipa_type)
380-
if ipa_type.has_params:
381-
stub_file = stub_out / cls.group / f"{cls.name}Constructor.qll"
382-
if not stub_file.is_file() or _is_generated_stub(stub_file):
383-
# stub rendering must be postponed as we might not have yet all subtracted ipa types in `ipa_type`
384-
stubs[stub_file] = ql.Synth.ConstructorStub(ipa_type)
385-
constructor_import = get_import(stub_file, opts.swift_dir)
386-
constructor_imports.append(constructor_import)
387-
if ipa_type.is_ipa:
388-
ipa_constructor_imports.append(constructor_import)
389-
else:
390-
non_final_ipa_types.append(ipa_type)
391-
392-
for stub_file, data in stubs.items():
393-
renderer.render(data, stub_file)
394-
renderer.render(ql.Synth.Types(root.name, final_ipa_types, non_final_ipa_types), out / "Synth.qll")
395-
renderer.render(ql.ImportList(constructor_imports), out / "SynthConstructors.qll")
396-
renderer.render(ql.ImportList(ipa_constructor_imports), out / "PureSynthConstructors.qll")
397-
398-
renderer.cleanup(existing)
399-
if opts.ql_format:
400-
format(opts.codeql_binary, renderer.written)
309+
with renderer.manage(generated=generated, stubs=stubs, registry=opts.generated_registry) as renderer:
310+
311+
db_classes = [cls for cls in classes.values() if not cls.ipa]
312+
renderer.render(ql.DbClasses(db_classes), out / "Raw.qll")
313+
314+
classes_by_dir_and_name = sorted(classes.values(), key=lambda cls: (cls.dir, cls.name))
315+
for c in classes_by_dir_and_name:
316+
imports[c.name] = get_import(stub_out / c.path, opts.swift_dir)
317+
318+
for c in classes.values():
319+
qll = out / c.path.with_suffix(".qll")
320+
c.imports = [imports[t] for t in get_classes_used_by(c)]
321+
renderer.render(c, qll)
322+
stub_file = stub_out / c.path.with_suffix(".qll")
323+
if not renderer.is_customized_stub(stub_file):
324+
stub = ql.Stub(name=c.name, base_import=get_import(qll, opts.swift_dir))
325+
renderer.render(stub, stub_file)
326+
327+
# for example path/to/elements -> path/to/elements.qll
328+
renderer.render(ql.ImportList(list(imports.values())), include_file)
329+
330+
renderer.render(ql.GetParentImplementation(list(classes.values())), out / 'ParentChild.qll')
331+
332+
for c in data.classes.values():
333+
if _should_skip_qltest(c, data.classes):
334+
continue
335+
test_dir = test_out / c.group / c.name
336+
test_dir.mkdir(parents=True, exist_ok=True)
337+
if not any(test_dir.glob("*.swift")):
338+
log.warning(f"no test source in {test_dir.relative_to(test_out)}")
339+
renderer.render(ql.MissingTestInstructions(),
340+
test_dir / missing_test_source_filename)
341+
continue
342+
total_props, partial_props = _partition(_get_all_properties_to_be_tested(c, data.classes),
343+
lambda p: p.is_single or p.is_predicate)
344+
renderer.render(ql.ClassTester(class_name=c.name,
345+
properties=total_props,
346+
# in case of collapsed hierarchies we want to see the actual QL class in results
347+
show_ql_class="qltest_collapse_hierarchy" in c.pragmas),
348+
test_dir / f"{c.name}.ql")
349+
for p in partial_props:
350+
renderer.render(ql.PropertyTester(class_name=c.name,
351+
property=p), test_dir / f"{c.name}_{p.getter}.ql")
352+
353+
final_ipa_types = []
354+
non_final_ipa_types = []
355+
constructor_imports = []
356+
ipa_constructor_imports = []
357+
stubs = {}
358+
for cls in sorted(data.classes.values(), key=lambda cls: (cls.group, cls.name)):
359+
ipa_type = get_ql_ipa_class(cls)
360+
if ipa_type.is_final:
361+
final_ipa_types.append(ipa_type)
362+
if ipa_type.has_params:
363+
stub_file = stub_out / cls.group / f"{cls.name}Constructor.qll"
364+
if not renderer.is_customized_stub(stub_file):
365+
# stub rendering must be postponed as we might not have yet all subtracted ipa types in `ipa_type`
366+
stubs[stub_file] = ql.Synth.ConstructorStub(ipa_type)
367+
constructor_import = get_import(stub_file, opts.swift_dir)
368+
constructor_imports.append(constructor_import)
369+
if ipa_type.is_ipa:
370+
ipa_constructor_imports.append(constructor_import)
371+
else:
372+
non_final_ipa_types.append(ipa_type)
373+
374+
for stub_file, data in stubs.items():
375+
renderer.render(data, stub_file)
376+
renderer.render(ql.Synth.Types(root.name, final_ipa_types, non_final_ipa_types), out / "Synth.qll")
377+
renderer.render(ql.ImportList(constructor_imports), out / "SynthConstructors.qll")
378+
renderer.render(ql.ImportList(ipa_constructor_imports), out / "PureSynthConstructors.qll")
379+
if opts.ql_format:
380+
format(opts.codeql_binary, renderer.written)

0 commit comments

Comments
 (0)