Skip to content

Commit 28d2fd7

Browse files
authored
Merge pull request github#9416 from github/redsun82/swift-codegen-docs
Swift: document and partially simplify codegen
2 parents e1c7de9 + 8723493 commit 28d2fd7

File tree

13 files changed

+158
-84
lines changed

13 files changed

+158
-84
lines changed

swift/codegen/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Code generation suite
2+
3+
This directory contains the code generation suite used by the Swift extractor and the QL library. This suite will use
4+
the abstract class specification of [`schema.yml`](schema.yml) to generate:
5+
6+
* [the `dbscheme` file](../ql/lib/swift.dbscheme) (see [`dbschemegen.py`](generators/dbschemegen.py))
7+
* [the QL generated code](../ql/lib/codeql/swift/generated) and when
8+
appropriate [the corresponding stubs](../ql/lib/codeql/swift/elements) (see [`qlgen.py`](generators/qlgen.py))
9+
* C++ tags and trap entries (see [`trapgen.py`](generators/trapgen.py))
10+
* C++ structured classes (see [`cppgen.py`](generators/cppgen.py))
11+
12+
## Usage
13+
14+
By default `bazel run //swift/codegen` will update all checked-in generated files (`dbscheme` and QL sources). You can
15+
append `--` followed by other options to tweak the behaviour, which is mainly intended for debugging.
16+
See `bazel run //swift/codegen -- --help` for a list of all options. In particular `--generate` can be used with a comma
17+
separated list to select what to generate (choosing among `dbscheme`, `ql`, `trap` and `cpp`).
18+
19+
C++ code is generated during build (see [`swift/extractor/trap/BUILD.bazel`](../extractor/trap/BUILD.bazel)). After a
20+
build you can browse the generated code in `bazel-bin/swift/extractor/trap/generated`.
21+
22+
## Implementation notes
23+
24+
The suite uses [mustache templating](https://mustache.github.io/) for generation. Templates are
25+
in [the `templates` directory](templates), prefixed with the generation target they are used for.
26+
27+
Rather than passing dictionaries to the templating engine, python dataclasses are used as defined
28+
in [the `lib` directory](lib). For each of the four generation targets the entry point for the implementation is
29+
specified as the `generate` function in the modules within [the `generators` directory](generators).
30+
31+
Finally, [`codegen.py`](codegen.py) is the driver script gluing everything together and specifying the command line
32+
options.
33+
34+
Unit tests are in [the `test` directory](test) and can be run via `bazel test //swift/codegen/test`.
35+
36+
For more details about each specific generation target, please refer to the module docstrings
37+
in [the `generators` directory](generators).

swift/codegen/codegen.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,29 @@
1414

1515

1616
def _parse_args() -> argparse.Namespace:
17-
p = argparse.ArgumentParser()
18-
p.add_argument("--generate", type=lambda x: x.split(","), default=["dbscheme", "ql"])
19-
p.add_argument("--verbose", "-v", action="store_true")
20-
p.add_argument("--swift-dir", type=_abspath, default=paths.swift_dir)
21-
p.add_argument("--schema", type=_abspath, default=paths.swift_dir / "codegen/schema.yml")
22-
p.add_argument("--dbscheme", type=_abspath, default=paths.swift_dir / "ql/lib/swift.dbscheme")
23-
p.add_argument("--ql-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/generated")
24-
p.add_argument("--ql-stub-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/elements")
25-
p.add_argument("--ql-format", action="store_true", default=True)
26-
p.add_argument("--no-ql-format", action="store_false", dest="ql_format")
27-
p.add_argument("--codeql-binary", default="codeql")
28-
p.add_argument("--cpp-output", type=_abspath)
29-
p.add_argument("--cpp-namespace", default="codeql")
30-
p.add_argument("--trap-affix", default="Trap")
31-
p.add_argument("--cpp-include-dir", default="swift/extractor/trap")
17+
p = argparse.ArgumentParser(description="Code generation suite")
18+
p.add_argument("--generate", type=lambda x: x.split(","), default=["dbscheme", "ql"],
19+
help="specify what targets to generate as a comma separated list, choosing among dbscheme, ql, trap "
20+
"and cpp")
21+
p.add_argument("--verbose", "-v", action="store_true", help="print more information")
22+
p.add_argument("--swift-dir", type=_abspath, default=paths.swift_dir,
23+
help="the directory that should be regarded as the root of the swift codebase. Used to compute QL "
24+
"imports and in some comments (default %(default)s)")
25+
p.add_argument("--schema", type=_abspath, default=paths.swift_dir / "codegen/schema.yml",
26+
help="input schema file (default %(default)s)")
27+
p.add_argument("--dbscheme", type=_abspath, default=paths.swift_dir / "ql/lib/swift.dbscheme",
28+
help="output file for dbscheme generation, input file for trap generation (default %(default)s)")
29+
p.add_argument("--ql-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/generated",
30+
help="output directory for generated QL files (default %(default)s)")
31+
p.add_argument("--ql-stub-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/elements",
32+
help="output directory for QL stub/customization files (default %(default)s). Defines also the "
33+
"generated qll file importing every class file")
34+
p.add_argument("--ql-format", action="store_true", default=True,
35+
help="use codeql to autoformat QL files (which is the default)")
36+
p.add_argument("--no-ql-format", action="store_false", dest="ql_format", help="do not format QL files")
37+
p.add_argument("--codeql-binary", default="codeql", help="command to use for QL formatting (default %(default)s)")
38+
p.add_argument("--cpp-output", type=_abspath,
39+
help="output directory for generated C++ files, required if trap or cpp is provided to --generate")
3240
return p.parse_args()
3341

3442

swift/codegen/generators/cppgen.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
"""
2+
C++ trap class generation
3+
4+
`generate(opts, renderer)` will generate `TrapClasses.h` out of a `yml` schema file.
5+
6+
Each class in the schema gets a corresponding `struct` in `TrapClasses.h`, where:
7+
* inheritance is preserved
8+
* each property will be a corresponding field in the `struct` (with repeated properties mapping to `std::vector` and
9+
optional ones to `std::optional`)
10+
* final classes get a streaming operator that serializes the whole class into the corresponding trap emissions (using
11+
`TrapEntries.h` from `trapgen`).
12+
"""
13+
114
import functools
215
from typing import Dict
316

@@ -7,7 +20,7 @@
720
from swift.codegen.lib import cpp, schema
821

922

10-
def _get_type(t: str, trap_affix: str) -> str:
23+
def _get_type(t: str) -> str:
1124
if t is None:
1225
# this is a predicate
1326
return "bool"
@@ -16,19 +29,19 @@ def _get_type(t: str, trap_affix: str) -> str:
1629
if t == "boolean":
1730
return "bool"
1831
if t[0].isupper():
19-
return f"{trap_affix}Label<{t}Tag>"
32+
return f"TrapLabel<{t}Tag>"
2033
return t
2134

2235

23-
def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Field:
36+
def _get_field(cls: schema.Class, p: schema.Property) -> cpp.Field:
2437
trap_name = None
2538
if not p.is_single:
2639
trap_name = inflection.camelize(f"{cls.name}_{p.name}")
2740
if not p.is_predicate:
2841
trap_name = inflection.pluralize(trap_name)
2942
args = dict(
3043
field_name=p.name + ("_" if p.name in cpp.cpp_keywords else ""),
31-
type=_get_type(p.type, trap_affix),
44+
type=_get_type(p.type),
3245
is_optional=p.is_optional,
3346
is_repeated=p.is_repeated,
3447
is_predicate=p.is_predicate,
@@ -39,9 +52,8 @@ def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Fi
3952

4053

4154
class Processor:
42-
def __init__(self, data: Dict[str, schema.Class], trap_affix: str):
55+
def __init__(self, data: Dict[str, schema.Class]):
4356
self._classmap = data
44-
self._trap_affix = trap_affix
4557

4658
@functools.lru_cache(maxsize=None)
4759
def _get_class(self, name: str) -> cpp.Class:
@@ -52,7 +64,7 @@ def _get_class(self, name: str) -> cpp.Class:
5264
return cpp.Class(
5365
name=name,
5466
bases=[self._get_class(b) for b in cls.bases],
55-
fields=[_get_field(cls, p, self._trap_affix) for p in cls.properties],
67+
fields=[_get_field(cls, p) for p in cls.properties],
5668
final=not cls.derived,
5769
trap_name=trap_name,
5870
)
@@ -64,7 +76,6 @@ def get_classes(self):
6476

6577
def generate(opts, renderer):
6678
assert opts.cpp_output
67-
processor = Processor({cls.name: cls for cls in schema.load(opts.schema).classes}, opts.trap_affix)
79+
processor = Processor({cls.name: cls for cls in schema.load(opts.schema).classes})
6880
out = opts.cpp_output
69-
renderer.render(cpp.ClassList(processor.get_classes(), opts.cpp_namespace, opts.trap_affix,
70-
opts.cpp_include_dir, opts.schema), out / f"{opts.trap_affix}Classes.h")
81+
renderer.render(cpp.ClassList(processor.get_classes(), opts.schema), out / f"TrapClasses.h")

swift/codegen/generators/dbschemegen.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,19 @@
1-
#!/usr/bin/env python3
1+
"""
2+
dbscheme file generation
3+
4+
`generate(opts, renderer)` will generate a `dbscheme` file out of a `yml` schema file.
5+
6+
Each final class in the schema file will get a corresponding defining DB table with the id and single properties as
7+
columns.
8+
Moreover:
9+
* single properties in non-final classes will also trigger generation of a table with an id reference and all single
10+
properties as columns
11+
* each optional property will trigger generation of a table with an id reference and the property value as columns
12+
* each repeated property will trigger generation of a table with an id reference, an `int` index and the property value
13+
as columns
14+
The type hierarchy will be translated to corresponding `union` declarations.
15+
"""
16+
217
import pathlib
318

419
import inflection
@@ -63,11 +78,10 @@ def cls_to_dbscheme(cls: schema.Class):
6378
name=inflection.underscore(f"{cls.name}_{f.name}"),
6479
columns=[
6580
Column("id", type=dbtype(cls.name)),
66-
],
81+
],
6782
)
6883

6984

70-
7185
def get_declarations(data: schema.Schema):
7286
return [d for cls in data.classes for d in cls_to_dbscheme(cls)]
7387

swift/codegen/generators/trapgen.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
#!/usr/bin/env python3
1+
"""
2+
C++ trap entry generation
3+
4+
`generate(opts, renderer)` will generate `TrapTags.h` (for types of labels) and `TrapEntries.h` (for trap emission) out
5+
of a dbscheme file.
6+
7+
Each table in the `dbscheme` gets a corresponding `struct` defined in `TrapEntries.h` with a field for each column and
8+
an appropriate streaming operator for the trap emission.
9+
10+
Unions in the `dbscheme` are used to populate a hierarchy of tags (empty structs) in `TrapTags.h` that is used to
11+
enforce a type system on trap labels (see `TrapLabel.h`).
12+
"""
213

314
import logging
415

@@ -15,24 +26,24 @@ def get_tag_name(s):
1526
return inflection.camelize(s[1:])
1627

1728

18-
def get_cpp_type(schema_type: str, trap_affix: str):
29+
def get_cpp_type(schema_type: str):
1930
if schema_type.startswith("@"):
2031
tag = get_tag_name(schema_type)
21-
return f"{trap_affix}Label<{tag}Tag>"
32+
return f"TrapLabel<{tag}Tag>"
2233
if schema_type == "string":
2334
return "std::string"
2435
if schema_type == "boolean":
2536
return "bool"
2637
return schema_type
2738

2839

29-
def get_field(c: dbscheme.Column, trap_affix: str):
40+
def get_field(c: dbscheme.Column):
3041
args = {
3142
"field_name": c.schema_name,
3243
"type": c.type,
3344
}
3445
args.update(cpp.get_field_override(c.schema_name))
35-
args["type"] = get_cpp_type(args["type"], trap_affix)
46+
args["type"] = get_cpp_type(args["type"])
3647
return cpp.Field(**args)
3748

3849

@@ -43,14 +54,14 @@ def get_binding_column(t: dbscheme.Table):
4354
return None
4455

4556

46-
def get_trap(t: dbscheme.Table, trap_affix: str):
57+
def get_trap(t: dbscheme.Table):
4758
id = get_binding_column(t)
4859
if id:
49-
id = get_field(id, trap_affix)
60+
id = get_field(id)
5061
return cpp.Trap(
5162
table_name=t.name,
5263
name=inflection.camelize(t.name),
53-
fields=[get_field(c, trap_affix) for c in t.columns],
64+
fields=[get_field(c) for c in t.columns],
5465
id=id,
5566
)
5667

@@ -63,14 +74,14 @@ def generate(opts, renderer):
6374
traps = []
6475
for e in dbscheme.iterload(opts.dbscheme):
6576
if e.is_table:
66-
traps.append(get_trap(e, opts.trap_affix))
77+
traps.append(get_trap(e))
6778
elif e.is_union:
6879
tag_graph.setdefault(e.lhs, set())
6980
for d in e.rhs:
7081
tag_graph.setdefault(d.type, set()).add(e.lhs)
7182

72-
renderer.render(cpp.TrapList(traps, opts.cpp_namespace, opts.trap_affix, opts.cpp_include_dir, opts.dbscheme),
73-
out / f"{opts.trap_affix}Entries.h")
83+
renderer.render(cpp.TrapList(traps, opts.dbscheme),
84+
out / f"TrapEntries.h")
7485

7586
tags = []
7687
for index, tag in enumerate(toposort_flatten(tag_graph)):
@@ -80,4 +91,4 @@ def generate(opts, renderer):
8091
index=index,
8192
id=tag,
8293
))
83-
renderer.render(cpp.TagList(tags, opts.cpp_namespace, opts.dbscheme), out / f"{opts.trap_affix}Tags.h")
94+
renderer.render(cpp.TagList(tags, opts.dbscheme), out / f"TrapTags.h")

swift/codegen/lib/cpp.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,6 @@ class TrapList:
101101
template: ClassVar = 'trap_traps'
102102

103103
traps: List[Trap]
104-
namespace: str
105-
trap_affix: str
106-
include_dir: str
107104
source: str
108105

109106

@@ -112,7 +109,6 @@ class TagList:
112109
template: ClassVar = 'trap_tags'
113110

114111
tags: List[Tag]
115-
namespace: str
116112
source: str
117113

118114

@@ -149,7 +145,4 @@ class ClassList:
149145
template: ClassVar = "cpp_classes"
150146

151147
classes: List[Class]
152-
namespace: str
153-
trap_affix: str
154-
include_dir: str
155148
source: str

swift/codegen/lib/ql.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
"""
2+
QL files generation
3+
4+
`generate(opts, renderer)` will generate QL classes and manage stub files out of a `yml` schema file.
5+
6+
Each class (for example, `Foo`) in the schema triggers:
7+
* generation of a `FooBase` class implementation translating all properties into appropriate getters
8+
* if not created or already customized, generation of a stub file which defines `Foo` as extending `FooBase`. This can
9+
be used to add hand-written code to `Foo`, which requires removal of the `// generated` header comment in that file.
10+
All generated base classes actually import these customizations when referencing other classes.
11+
Generated files that do not correspond any more to any class in the schema are deleted. Customized stubs are however
12+
left behind and must be dealt with by hand.
13+
"""
14+
115
import pathlib
216
from dataclasses import dataclass, field
317
from typing import List, ClassVar

swift/codegen/templates/cpp_classes.mustache

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
#include <optional>
77
#include <vector>
88

9-
#include "{{include_dir}}/{{trap_affix}}Label.h"
10-
#include "./{{trap_affix}}Entries.h"
9+
#include "swift/extractor/trap/TrapLabel.h"
10+
#include "./TrapEntries.h"
1111

12-
namespace {{namespace}} {
12+
namespace codeql {
1313
{{#classes}}
1414

1515
struct {{name}}{{#final}} : Binding<{{name}}Tag>{{#bases}}, {{ref.name}}{{/bases}}{{/final}}{{^final}}{{#has_bases}}: {{#bases}}{{^first}}, {{/first}}{{ref.name}}{{/bases}}{{/has_bases}}{{/final}} {
@@ -25,29 +25,29 @@ struct {{name}}{{#final}} : Binding<{{name}}Tag>{{#bases}}, {{ref.name}}{{/bases
2525
{{/final}}
2626

2727
protected:
28-
void emit({{^final}}{{trap_affix}}Label<{{name}}Tag> id, {{/final}}std::ostream& out) const {
28+
void emit({{^final}}TrapLabel<{{name}}Tag> id, {{/final}}std::ostream& out) const {
2929
{{#trap_name}}
30-
out << {{.}}{{trap_affix}}{id{{#single_fields}}, {{field_name}}{{/single_fields}}} << '\n';
30+
out << {{.}}Trap{id{{#single_fields}}, {{field_name}}{{/single_fields}}} << '\n';
3131
{{/trap_name}}
3232
{{#bases}}
3333
{{ref.name}}::emit(id, out);
3434
{{/bases}}
3535
{{#fields}}
3636
{{#is_predicate}}
37-
if ({{field_name}}) out << {{trap_name}}{{trap_affix}}{id} << '\n';
37+
if ({{field_name}}) out << {{trap_name}}Trap{id} << '\n';
3838
{{/is_predicate}}
3939
{{#is_optional}}
4040
{{^is_repeated}}
41-
if ({{field_name}}) out << {{trap_name}}{{trap_affix}}{id, *{{field_name}}} << '\n';
41+
if ({{field_name}}) out << {{trap_name}}Trap{id, *{{field_name}}} << '\n';
4242
{{/is_repeated}}
4343
{{/is_optional}}
4444
{{#is_repeated}}
4545
for (auto i = 0u; i < {{field_name}}.size(); ++i) {
4646
{{^is_optional}}
47-
out << {{trap_name}}{{trap_affix}}{id, i, {{field_name}}[i]};
47+
out << {{trap_name}}Trap{id, i, {{field_name}}[i]};
4848
{{/is_optional}}
4949
{{#is_optional}}
50-
if ({{field_name}}[i]) out << {{trap_name}}{{trap_affix}}{id, i, *{{field_name}}[i]};
50+
if ({{field_name}}[i]) out << {{trap_name}}Trap{id, i, *{{field_name}}[i]};
5151
{{/is_optional}}
5252
}
5353
{{/is_repeated}}

swift/codegen/templates/trap_tags.mustache

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// clang-format off
33
#pragma once
44

5-
namespace {{namespace}} {
5+
namespace codeql {
66
{{#tags}}
77

88
// {{id}}

0 commit comments

Comments
 (0)