michaelnebel
diff --git a/‎swift/README.md
Lines changed: 27 additions & 1 deletion b/‎swift/README.md
Lines changed: 27 additions & 1 deletion
diff --git a/‎swift/codegen/BUILD.bazel
Lines changed: 8 additions & 0 deletions b/‎swift/codegen/BUILD.bazel
Lines changed: 8 additions & 0 deletions
diff --git a/‎swift/codegen/codegen.py
Lines changed: 7 additions & 0 deletions b/‎swift/codegen/codegen.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎swift/codegen/dbschemegen.py
Lines changed: 76 additions & 0 deletions b/‎swift/codegen/dbschemegen.py
Lines changed: 76 additions & 0 deletions
diff --git a/‎swift/codegen/lib/dbscheme.py
Lines changed: 85 additions & 0 deletions b/‎swift/codegen/lib/dbscheme.py
Lines changed: 85 additions & 0 deletions
diff --git a/‎swift/codegen/lib/generator.py
Lines changed: 55 additions & 0 deletions b/‎swift/codegen/lib/generator.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎swift/codegen/lib/paths.py
Lines changed: 15 additions & 0 deletions b/‎swift/codegen/lib/paths.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎swift/codegen/lib/renderer.py
Lines changed: 59 additions & 0 deletions b/‎swift/codegen/lib/renderer.py
Lines changed: 59 additions & 0 deletions
@@ -4,6 +4,32 @@ The Swift codeql package is an experimental and unsupported work in progress.
 
 ## Usage
 
-Run `bazel run //swift:create-extractor-pack`, which will install `swift/extractor-pack`.
+Run
+
+```bash
+bazel run //swift:create-extractor-pack
+```
+
+which will install `swift/extractor-pack`.
+
 Using `--search-path=swift/extractor-pack` will then pick up the Swift extractor. You can also use
 `--search-path=swift`, as the extractor pack is mentioned in `swift/.codeqlmanifest.json`.
+
+Notice you can run `bazel run :create-extractor-pack` if you already are in the `swift` directory.
+
+## Code generation
+
+Make sure to install the [pip requirements](./codegen/requirements.txt) via
+
+```bash
+python3 -m pip install -r codegen/requirements.txt
+```
+
+Run
+
+```bash
+bazel run //swift/codegen
+```
+
+to update generated files. This can be shortened to
+`bazel run codegen` if you are in the `swift` directory.
@@ -0,0 +1,8 @@
+py_binary(
+    name = "codegen",
+    srcs = glob(["**/*.py"]),
+    data = glob(["**/*.mustache"]) + [
+        "schema.yml",
+        "prefix.dbscheme",
+    ],
+)
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from lib import generator
+import dbschemegen
+
+if __name__ == "__main__":
+    generator.run(dbschemegen.generate)
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+
+import inflection
+
+from lib.renderer import Renderer
+from lib.dbscheme import *
+from lib import paths, schema, generator
+
+log = logging.getLogger(__name__)
+
+
+def dbtype(typename):
+    if typename[0].isupper():
+        return "@" + inflection.underscore(typename)
+    return typename
+
+
+def cls_to_dbscheme(cls: schema.Class):
+    if cls.derived:
+        yield DbUnion(dbtype(cls.name), (dbtype(c) for c in cls.derived))
+    if not cls.derived or any(f.is_single() for f in cls.fields):
+        binding = not cls.derived
+        keyset = DbKeySet(["id"]) if cls.derived else None
+        yield DbTable(
+            keyset=keyset,
+            name=inflection.tableize(cls.name),
+            columns=[
+                        DbColumn("id", type=dbtype(cls.name), binding=binding),
+                    ] + [
+                        DbColumn(f.name, dbtype(f.type)) for f in cls.fields if f.is_single()
+                    ]
+        )
+    for f in cls.fields:
+        if f.is_optional():
+            yield DbTable(
+                keyset=DbKeySet(["id"]),
+                name=inflection.tableize(f"{cls.name}_{f.name}"),
+                columns=[
+                    DbColumn("id", type=dbtype(cls.name)),
+                    DbColumn(f.name, dbtype(f.type)),
+                ],
+            )
+        elif f.is_repeated():
+            yield DbTable(
+                keyset=DbKeySet(["id", "index"]),
+                name=inflection.tableize(f"{cls.name}_{f.name}"),
+                columns=[
+                    DbColumn("id", type=dbtype(cls.name)),
+                    DbColumn("index", type="int"),
+                    DbColumn(inflection.singularize(f.name), dbtype(f.type)),
+                ]
+            )
+
+
+def generate(opts):
+    input = opts.schema.resolve()
+    out = opts.dbscheme.resolve()
+    renderer = Renderer(opts.check)
+
+    with open(input) as src:
+        data = schema.load(src)
+
+    declarations = [d for cls in data.classes.values() for d in cls_to_dbscheme(cls)]
+
+    includes = []
+    for inc in data.includes:
+        inc = input.parent / inc
+        with open(inc) as inclusion:
+            includes.append({"src": inc.relative_to(paths.swift_dir), "data": inclusion.read()})
+    renderer.render("dbscheme", out, includes=includes, src=input.relative_to(paths.swift_dir),
+                    declarations=declarations)
+    return renderer.written
+
+
+if __name__ == "__main__":
+    generator.run(generate, tags=["schema", "dbscheme"])
@@ -0,0 +1,85 @@
+import logging
+import re
+from dataclasses import dataclass
+from typing import ClassVar, List
+
+log = logging.getLogger(__name__)
+
+dbscheme_keywords = {"case", "boolean", "int", "string", "type"}
+
+
+@dataclass
+class DbColumn:
+    schema_name: str
+    type: str
+    binding: bool = False
+    first: bool = False
+
+    def name(self):
+        if self.schema_name in dbscheme_keywords:
+            return self.schema_name + "_"
+        return self.schema_name
+
+    def lhstype(self):
+        if self.type[0] == "@":
+            return "unique int" if self.binding else "int"
+        return self.type
+
+    def rhstype(self):
+        if self.type[0] == "@" and self.binding:
+            return self.type
+        return self.type + " ref"
+
+
+@dataclass
+class DbKeySetId:
+    id: str
+    first: bool = False
+
+
+@dataclass
+class DbKeySet:
+    ids: List[DbKeySetId]
+
+    def __post_init__(self):
+        assert self.ids
+        self.ids = [DbKeySetId(x) for x in self.ids]
+        self.ids[0].first = True
+
+
+class DbDecl:
+    is_table = False
+    is_union = False
+
+
+@dataclass
+class DbTable(DbDecl):
+    is_table: ClassVar = True
+
+    name: str
+    columns: List[DbColumn]
+    keyset: DbKeySet = None
+
+    def __post_init__(self):
+        if self.columns:
+            self.columns[0].first = True
+
+
+@dataclass
+class DbUnionCase:
+    type: str
+    first: bool = False
+
+
+@dataclass
+class DbUnion(DbDecl):
+    is_union: ClassVar = True
+
+    lhs: str
+    rhs: List[DbUnionCase]
+
+    def __post_init__(self):
+        assert self.rhs
+        self.rhs = [DbUnionCase(x) for x in self.rhs]
+        self.rhs.sort(key=lambda c: c.type)
+        self.rhs[0].first = True
@@ -0,0 +1,55 @@
+import argparse
+import collections
+import logging
+import pathlib
+import sys
+
+from . import paths
+
+options = collections.defaultdict(list)
+
+
+class Option:
+    def __init__(self, *args, tags=None, **kwargs):
+        tags = tags or []
+        self.args = args
+        self.kwargs = kwargs
+        if tags:
+            for t in tags:
+                options[t].append(self)
+        else:
+            options["*"].append(self)
+
+    def add_to(self, parser: argparse.ArgumentParser):
+        parser.add_argument(*self.args, **self.kwargs)
+
+
+Option("--check", "-c", action="store_true")
+Option("--verbose", "-v", action="store_true")
+Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
+Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
+
+
+def _parse(*tags):
+    parser = argparse.ArgumentParser()
+    if not tags:
+        opts = [o for os in options.values() for o in os]
+    else:
+        opts = options["*"]
+        for t in tags:
+            opts.extend(options[t])
+    for opt in opts:
+        opt.add_to(parser)
+    ret = parser.parse_args()
+    log_level = logging.DEBUG if ret.verbose else logging.INFO
+    logging.basicConfig(format="{levelname} {message}", style='{', level=log_level)
+    return ret
+
+
+def run(*generate, tags=()):
+    opts = _parse(*tags)
+    done_something = False
+    for g in generate:
+        if g(opts):
+            done_something = True
+    sys.exit(1 if opts.check and done_something else 0)
@@ -0,0 +1,15 @@
+import pathlib
+import sys
+import os
+
+try:
+    _workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])
+    swift_dir = _workspace_dir / 'swift'
+    lib_dir = swift_dir / 'codegen' / 'lib'
+except KeyError:
+    _this_file = pathlib.Path(__file__).resolve()
+    swift_dir = _this_file.parents[2]
+    lib_dir = _this_file.parent
+
+
+exe_file = pathlib.Path(sys.argv[0]).resolve()
@@ -0,0 +1,59 @@
+import hashlib
+import logging
+
+import pystache
+
+from . import paths
+
+log = logging.getLogger(__name__)
+
+
+def md5(data):
+    return hashlib.md5(data).digest()
+
+
+class Renderer:
+    def __init__(self, check=False):
+        self.r = pystache.Renderer(search_dirs=str(paths.lib_dir / "templates"), escape=lambda u: u)
+        self.generator = paths.exe_file.relative_to(paths.swift_dir)
+        self.check = check
+        self.written = set()
+        self.skipped = set()
+        self.erased = set()
+
+    @property
+    def done_something(self):
+        return bool(self.written or self.erased)
+
+    @property
+    def rendered(self):
+        return self.written | self.skipped
+
+    def render(self, name, output, **data):
+        mnemonic, _, _ = name.lower().partition(".")
+        output.parent.mkdir(parents=True, exist_ok=True)
+        data["generator"] = self.generator
+        data = self.r.render_name(name, data)
+        if output.is_file():
+            with open(output, "rb") as file:
+                if md5(data.encode()) == md5(file.read()):
+                    log.debug(f"skipped {output.name}")
+                    self.skipped.add(output)
+                    return
+        if self.check:
+            log.error(f"would have generated {mnemonic} {output.name}")
+        else:
+            with open(output, "w") as out:
+                out.write(data)
+            log.info(f"generated {mnemonic} {output.name}")
+        self.written.add(output)
+
+    def cleanup(self, existing):
+        for f in existing - self.written - self.skipped:
+            if f.is_file():
+                if self.check:
+                    log.error(f"would have removed {f.name}")
+                else:
+                    f.unlink()
+                    log.info(f"removed {f.name}")
+                self.erased.add(f)