Skip to content

Commit 91fd83a

Browse files
committed
Swift: dbscheme generator
This patch introduces the basic infrastructure of the code generation suite and the `dbscheme` generator. Notice that the checked in `schema.yml` should reflect swift 5.6 but might need some tweaking. Closes github/codeql-c-team#979
1 parent d094bbc commit 91fd83a

File tree

14 files changed

+3318
-5
lines changed

14 files changed

+3318
-5
lines changed

swift/README.md

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,32 @@ The Swift codeql package is an experimental and unsupported work in progress.
44

55
## Usage
66

7-
Run `bazel run //swift:create-extractor-pack`, which will install `swift/extractor-pack`.
7+
Run
8+
9+
```bash
10+
bazel run //swift:create-extractor-pack
11+
```
12+
13+
which will install `swift/extractor-pack`.
14+
815
Using `--search-path=swift/extractor-pack` will then pick up the Swift extractor. You can also use
916
`--search-path=swift`, as the extractor pack is mentioned in `swift/.codeqlmanifest.json`.
17+
18+
Notice you can run `bazel run :create-extractor-pack` if you already are in the `swift` directory.
19+
20+
## Code generation
21+
22+
Make sure to install the [pip requirements](./codegen/requirements.txt) via
23+
24+
```bash
25+
python3 -m pip install -r codegen/requirements.txt
26+
```
27+
28+
Run
29+
30+
```bash
31+
bazel run //swift/codegen
32+
```
33+
34+
to update generated files. This can be shortened to
35+
`bazel run codegen` if you are in the `swift` directory.

swift/codegen/BUILD.bazel

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
py_binary(
2+
name = "codegen",
3+
srcs = glob(["**/*.py"]),
4+
data = glob(["**/*.mustache"]) + [
5+
"schema.yml",
6+
"prefix.dbscheme",
7+
],
8+
)

swift/codegen/codegen.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/usr/bin/env python3
2+
3+
from lib import generator
4+
import dbschemegen
5+
6+
if __name__ == "__main__":
7+
generator.run(dbschemegen.generate)

swift/codegen/dbschemegen.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python3
2+
3+
import inflection
4+
5+
from lib.renderer import Renderer
6+
from lib.dbscheme import *
7+
from lib import paths, schema, generator
8+
9+
log = logging.getLogger(__name__)
10+
11+
12+
def dbtype(typename):
13+
if typename[0].isupper():
14+
return "@" + inflection.underscore(typename)
15+
return typename
16+
17+
18+
def cls_to_dbscheme(cls: schema.Class):
19+
if cls.derived:
20+
yield DbUnion(dbtype(cls.name), (dbtype(c) for c in cls.derived))
21+
if not cls.derived or any(f.is_single() for f in cls.fields):
22+
binding = not cls.derived
23+
keyset = DbKeySet(["id"]) if cls.derived else None
24+
yield DbTable(
25+
keyset=keyset,
26+
name=inflection.tableize(cls.name),
27+
columns=[
28+
DbColumn("id", type=dbtype(cls.name), binding=binding),
29+
] + [
30+
DbColumn(f.name, dbtype(f.type)) for f in cls.fields if f.is_single()
31+
]
32+
)
33+
for f in cls.fields:
34+
if f.is_optional():
35+
yield DbTable(
36+
keyset=DbKeySet(["id"]),
37+
name=inflection.tableize(f"{cls.name}_{f.name}"),
38+
columns=[
39+
DbColumn("id", type=dbtype(cls.name)),
40+
DbColumn(f.name, dbtype(f.type)),
41+
],
42+
)
43+
elif f.is_repeated():
44+
yield DbTable(
45+
keyset=DbKeySet(["id", "index"]),
46+
name=inflection.tableize(f"{cls.name}_{f.name}"),
47+
columns=[
48+
DbColumn("id", type=dbtype(cls.name)),
49+
DbColumn("index", type="int"),
50+
DbColumn(inflection.singularize(f.name), dbtype(f.type)),
51+
]
52+
)
53+
54+
55+
def generate(opts):
56+
input = opts.schema.resolve()
57+
out = opts.dbscheme.resolve()
58+
renderer = Renderer(opts.check)
59+
60+
with open(input) as src:
61+
data = schema.load(src)
62+
63+
declarations = [d for cls in data.classes.values() for d in cls_to_dbscheme(cls)]
64+
65+
includes = []
66+
for inc in data.includes:
67+
inc = input.parent / inc
68+
with open(inc) as inclusion:
69+
includes.append({"src": inc.relative_to(paths.swift_dir), "data": inclusion.read()})
70+
renderer.render("dbscheme", out, includes=includes, src=input.relative_to(paths.swift_dir),
71+
declarations=declarations)
72+
return renderer.written
73+
74+
75+
if __name__ == "__main__":
76+
generator.run(generate, tags=["schema", "dbscheme"])

swift/codegen/lib/dbscheme.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import logging
2+
import re
3+
from dataclasses import dataclass
4+
from typing import ClassVar, List
5+
6+
log = logging.getLogger(__name__)
7+
8+
dbscheme_keywords = {"case", "boolean", "int", "string", "type"}
9+
10+
11+
@dataclass
12+
class DbColumn:
13+
schema_name: str
14+
type: str
15+
binding: bool = False
16+
first: bool = False
17+
18+
def name(self):
19+
if self.schema_name in dbscheme_keywords:
20+
return self.schema_name + "_"
21+
return self.schema_name
22+
23+
def lhstype(self):
24+
if self.type[0] == "@":
25+
return "unique int" if self.binding else "int"
26+
return self.type
27+
28+
def rhstype(self):
29+
if self.type[0] == "@" and self.binding:
30+
return self.type
31+
return self.type + " ref"
32+
33+
34+
@dataclass
35+
class DbKeySetId:
36+
id: str
37+
first: bool = False
38+
39+
40+
@dataclass
41+
class DbKeySet:
42+
ids: List[DbKeySetId]
43+
44+
def __post_init__(self):
45+
assert self.ids
46+
self.ids = [DbKeySetId(x) for x in self.ids]
47+
self.ids[0].first = True
48+
49+
50+
class DbDecl:
51+
is_table = False
52+
is_union = False
53+
54+
55+
@dataclass
56+
class DbTable(DbDecl):
57+
is_table: ClassVar = True
58+
59+
name: str
60+
columns: List[DbColumn]
61+
keyset: DbKeySet = None
62+
63+
def __post_init__(self):
64+
if self.columns:
65+
self.columns[0].first = True
66+
67+
68+
@dataclass
69+
class DbUnionCase:
70+
type: str
71+
first: bool = False
72+
73+
74+
@dataclass
75+
class DbUnion(DbDecl):
76+
is_union: ClassVar = True
77+
78+
lhs: str
79+
rhs: List[DbUnionCase]
80+
81+
def __post_init__(self):
82+
assert self.rhs
83+
self.rhs = [DbUnionCase(x) for x in self.rhs]
84+
self.rhs.sort(key=lambda c: c.type)
85+
self.rhs[0].first = True

swift/codegen/lib/generator.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import argparse
2+
import collections
3+
import logging
4+
import pathlib
5+
import sys
6+
7+
from . import paths
8+
9+
options = collections.defaultdict(list)
10+
11+
12+
class Option:
13+
def __init__(self, *args, tags=None, **kwargs):
14+
tags = tags or []
15+
self.args = args
16+
self.kwargs = kwargs
17+
if tags:
18+
for t in tags:
19+
options[t].append(self)
20+
else:
21+
options["*"].append(self)
22+
23+
def add_to(self, parser: argparse.ArgumentParser):
24+
parser.add_argument(*self.args, **self.kwargs)
25+
26+
27+
Option("--check", "-c", action="store_true")
28+
Option("--verbose", "-v", action="store_true")
29+
Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
30+
Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
31+
32+
33+
def _parse(*tags):
34+
parser = argparse.ArgumentParser()
35+
if not tags:
36+
opts = [o for os in options.values() for o in os]
37+
else:
38+
opts = options["*"]
39+
for t in tags:
40+
opts.extend(options[t])
41+
for opt in opts:
42+
opt.add_to(parser)
43+
ret = parser.parse_args()
44+
log_level = logging.DEBUG if ret.verbose else logging.INFO
45+
logging.basicConfig(format="{levelname} {message}", style='{', level=log_level)
46+
return ret
47+
48+
49+
def run(*generate, tags=()):
50+
opts = _parse(*tags)
51+
done_something = False
52+
for g in generate:
53+
if g(opts):
54+
done_something = True
55+
sys.exit(1 if opts.check and done_something else 0)

swift/codegen/lib/paths.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import pathlib
2+
import sys
3+
import os
4+
5+
try:
6+
_workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])
7+
swift_dir = _workspace_dir / 'swift'
8+
lib_dir = swift_dir / 'codegen' / 'lib'
9+
except KeyError:
10+
_this_file = pathlib.Path(__file__).resolve()
11+
swift_dir = _this_file.parents[2]
12+
lib_dir = _this_file.parent
13+
14+
15+
exe_file = pathlib.Path(sys.argv[0]).resolve()

swift/codegen/lib/renderer.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import hashlib
2+
import logging
3+
4+
import pystache
5+
6+
from . import paths
7+
8+
log = logging.getLogger(__name__)
9+
10+
11+
def md5(data):
12+
return hashlib.md5(data).digest()
13+
14+
15+
class Renderer:
16+
def __init__(self, check=False):
17+
self.r = pystache.Renderer(search_dirs=str(paths.lib_dir / "templates"), escape=lambda u: u)
18+
self.generator = paths.exe_file.relative_to(paths.swift_dir)
19+
self.check = check
20+
self.written = set()
21+
self.skipped = set()
22+
self.erased = set()
23+
24+
@property
25+
def done_something(self):
26+
return bool(self.written or self.erased)
27+
28+
@property
29+
def rendered(self):
30+
return self.written | self.skipped
31+
32+
def render(self, name, output, **data):
33+
mnemonic, _, _ = name.lower().partition(".")
34+
output.parent.mkdir(parents=True, exist_ok=True)
35+
data["generator"] = self.generator
36+
data = self.r.render_name(name, data)
37+
if output.is_file():
38+
with open(output, "rb") as file:
39+
if md5(data.encode()) == md5(file.read()):
40+
log.debug(f"skipped {output.name}")
41+
self.skipped.add(output)
42+
return
43+
if self.check:
44+
log.error(f"would have generated {mnemonic} {output.name}")
45+
else:
46+
with open(output, "w") as out:
47+
out.write(data)
48+
log.info(f"generated {mnemonic} {output.name}")
49+
self.written.add(output)
50+
51+
def cleanup(self, existing):
52+
for f in existing - self.written - self.skipped:
53+
if f.is_file():
54+
if self.check:
55+
log.error(f"would have removed {f.name}")
56+
else:
57+
f.unlink()
58+
log.info(f"removed {f.name}")
59+
self.erased.add(f)

0 commit comments

Comments
 (0)