|
| 1 | +#! /usr/bin/env python3 |
| 2 | + |
| 3 | +""" |
| 4 | +A generic Bowtie harness for the JSON Schema Utils validator using |
| 5 | +the JSON Model Compiler as a backend for a target language. |
| 6 | +
|
| 7 | +The harness invokes the "jsu-compile" command to generate a validator and |
| 8 | +then the generated validator script or executable to validate each test. |
| 9 | +""" |
| 10 | + |
| 11 | +from pathlib import Path |
| 12 | +import hashlib |
| 13 | +import json |
| 14 | +import os |
| 15 | +import platform |
| 16 | +import shutil |
| 17 | +import subprocess |
| 18 | +import sys |
| 19 | +import traceback |
| 20 | + |
| 21 | +from jsonschema_specifications import REGISTRY |
| 22 | + |
| 23 | +type JsonObject = dict[str, Json] |
| 24 | +type JsonArray = list[Json] |
| 25 | +type Json = None | bool | int | float | str | JsonArray | JsonObject |
| 26 | + |
| 27 | +# available JSON Schema specifications |
| 28 | +SPECS: dict[str, JsonObject] = { |
| 29 | + url: REGISTRY.contents(url) for url in REGISTRY |
| 30 | +} |
| 31 | + |
| 32 | +# JSON Schema version URL to internal version |
| 33 | +VERSIONS: dict[str, int] = { |
| 34 | + "https://json-schema.org/draft/2020-12/schema": 9, |
| 35 | + "https://json-schema.org/draft/2019-09/schema": 8, |
| 36 | + "http://json-schema.org/draft-07/schema#": 7, |
| 37 | + "http://json-schema.org/draft-06/schema#": 6, |
| 38 | + "http://json-schema.org/draft-04/schema#": 4, |
| 39 | + "http://json-schema.org/draft-03/schema#": 3, |
| 40 | +} |
| 41 | + |
| 42 | +# cache is used for registry and meta schemas |
| 43 | +CACHE: Path = Path(__file__).parent / "schema-cache-by-hashed-urls" |
| 44 | + |
| 45 | +# directory for temporary files |
| 46 | +TMP: Path = Path(__file__).parent / "work" |
| 47 | + |
| 48 | +# environment variables |
| 49 | +ENV: Path = Path(__file__).parent / ".env" |
| 50 | + |
| 51 | +def get_version(cmd: list[str]) -> str: |
| 52 | + """Run external command and return first non empty output line.""" |
| 53 | + ps = subprocess.run(cmd, text=True, capture_output=True, check=True) # noqa: S603 |
| 54 | + lines = list(filter(lambda s: s != "", ps.stdout.split("\n"))) |
| 55 | + return lines[0] |
| 56 | + |
| 57 | +def json_file(filename: str, data: Json) -> Path: |
| 58 | + """Put JSON data into a temporary file.""" |
| 59 | + file: Path = TMP / filename |
| 60 | + with Path.open(file, "w") as sf: |
| 61 | + json.dump(data, sf) |
| 62 | + return file |
| 63 | + |
| 64 | + |
| 65 | +class RunnerError(Exception): |
| 66 | + pass |
| 67 | + |
| 68 | + |
| 69 | +class Runner: |
| 70 | + |
| 71 | + def __init__(self, language: str = "python", options: list[str] = []): |
| 72 | + |
| 73 | + # setup environment |
| 74 | + if ENV.exists(): |
| 75 | + with Path.open(ENV) as env: |
| 76 | + for line in env: |
| 77 | + if line.startswith("export ") and "=" in line: |
| 78 | + var, val = line[7:].rstrip().split("=", 1) |
| 79 | + os.environ[var] = val |
| 80 | + |
| 81 | + # setup language |
| 82 | + self.language: str = language.lower() |
| 83 | + |
| 84 | + # current dialect |
| 85 | + self.version: int | None = None |
| 86 | + |
| 87 | + # count input lines for some error messages |
| 88 | + self.line: int = 0 |
| 89 | + |
| 90 | + # compiler output file |
| 91 | + self.output: str |
| 92 | + |
| 93 | + # how to execute the generated file |
| 94 | + self.runner: list[str] |
| 95 | + |
| 96 | + # command to get the language version |
| 97 | + vers_cmd: list[str] |
| 98 | + |
| 99 | + # per-language settings |
| 100 | + match self.language: |
| 101 | + case "python": |
| 102 | + self.output = TMP / "schema.py" |
| 103 | + self.runner = ["python", str(self.output)] |
| 104 | + vers_cmd = ["python", "--version"] |
| 105 | + case "c": |
| 106 | + self.output = TMP / "schema.out" |
| 107 | + self.runner = [str(self.output)] |
| 108 | + vers_cmd = ["cc", "--version"] |
| 109 | + case "js": # requires node_modules |
| 110 | + self.output = TMP / "schema.js" |
| 111 | + self.runner = ["node", str(self.output)] |
| 112 | + vers_cmd = ["node", "--version"] |
| 113 | + case "java": # requires CLASSPATH |
| 114 | + self.output = TMP / "schema.class" |
| 115 | + self.runner = ["java", "schema", "-j", "GSON"] |
| 116 | + vers_cmd = ["java", "--version"] |
| 117 | + case "perl": # requires PERLLIB |
| 118 | + self.output = TMP / "schema.pl" |
| 119 | + self.runner = ["perl", str(self.output)] |
| 120 | + # perl --version is too verbose, use a short script |
| 121 | + vers_cmd = ["perl", "-e", 'print "Perl $^V\n"'] |
| 122 | + case "plpgsql": # requires a running Postgres |
| 123 | + self.output = TMP / "schema.sql" |
| 124 | + self.runner = ["run_plpgsql.sh", str(self.output)] |
| 125 | + vers_cmd = ["psql", "--version"] |
| 126 | + case _: |
| 127 | + raise RunnerError(f"unexpected language: {language}") |
| 128 | + |
| 129 | + self.language_version = get_version(vers_cmd) |
| 130 | + |
| 131 | + # compiler call prefix missing version, output file and input schema |
| 132 | + self.jsu_compile = [ |
| 133 | + "jsu-compile", |
| 134 | + "--cache", str(CACHE), |
| 135 | + "--no-fix", # do not try to fix the schema |
| 136 | + "--no-strict", # accept any odd looking schema |
| 137 | + "--no-reporting", # do not generate location reporting code |
| 138 | + "--loose", # ints are floats, floats may be ints |
| 139 | + # next options may override the above defaults |
| 140 | + *options, |
| 141 | + ] |
| 142 | + self.jsu_version = get_version(["jsu-compile", "--version"]) |
| 143 | + |
| 144 | + TMP.mkdir(exist_ok=True) |
| 145 | + |
| 146 | + def compile_schema(self, schema: JsonObject) -> Path: |
| 147 | + """Compile a schema for the current language.""" |
| 148 | + |
| 149 | + schema_file = json_file("schema.json", schema) |
| 150 | + output_file = TMP / self.output |
| 151 | + |
| 152 | + jsu_compile = [ |
| 153 | + *self.jsu_compile, |
| 154 | + "--schema-version", str(self.version or 7), |
| 155 | + "-o", str(output_file), |
| 156 | + str(schema_file), |
| 157 | + ] |
| 158 | + |
| 159 | + subprocess.run(jsu_compile, text=True, check=True) # noqa: S603 |
| 160 | + |
| 161 | + return output_file |
| 162 | + |
| 163 | + def run_test(self, test: Json) -> bool: |
| 164 | + """Run one test using generated validator.""" |
| 165 | + |
| 166 | + test_file = json_file("test.json", test) |
| 167 | + |
| 168 | + ps = subprocess.run( # noqa: S603 |
| 169 | + [ *self.runner, str(test_file) ], |
| 170 | + text=True, capture_output=True, check=True, |
| 171 | + ) |
| 172 | + |
| 173 | + if "FAIL" in ps.stdout: |
| 174 | + return False |
| 175 | + elif "PASS" in ps.stdout: |
| 176 | + return True |
| 177 | + else: |
| 178 | + raise RunnerError(f"unexpected validation output: {ps.output}") |
| 179 | + |
| 180 | + def cmd_start(self, req: JsonObject) -> JsonObject: |
| 181 | + """Respond to start with various meta data about the implementation.""" |
| 182 | + |
| 183 | + assert req.get("version") == 1, "expecting protocol version 1" |
| 184 | + |
| 185 | + return { |
| 186 | + "version": 1, |
| 187 | + "implementation": { |
| 188 | + "name": "jsu-compile", |
| 189 | + "version": self.jsu_version, |
| 190 | + "language": self.language, |
| 191 | + "language_version": self.language_version, |
| 192 | + "os": platform.system(), |
| 193 | + "os_version": platform.release(), |
| 194 | + "dialects": sorted(VERSIONS.keys()), |
| 195 | + "homepage": "https://github.com/zx80/json-schema-utils/", |
| 196 | + "documentation": "https://github.com/zx80/json-schema-utils/", |
| 197 | + "issues": "https://github.com/zx80/json-schema-utils/issues", |
| 198 | + "source": "https://github.com/zx80/json-schema-utils.git", |
| 199 | + }, |
| 200 | + } |
| 201 | + |
| 202 | + def cmd_dialect(self, req: JsonObject) -> JsonObject: |
| 203 | + """Set current JSON Schema dialect, needed for schema semantics.""" |
| 204 | + |
| 205 | + assert "dialect" in req, "dialect command expects a dialect" |
| 206 | + |
| 207 | + try: |
| 208 | + self.version = VERSIONS[req["dialect"]] |
| 209 | + except KeyError: # unknown version |
| 210 | + self.version = 0 |
| 211 | + |
| 212 | + return {"ok": True} |
| 213 | + |
| 214 | + def cmd_run(self, req: JsonObject) -> JsonObject: |
| 215 | + """Run one case and its tests.""" |
| 216 | + |
| 217 | + # sanity checks out of the try/except |
| 218 | + case = req["case"] |
| 219 | + assert isinstance(case, dict), "case is an object" |
| 220 | + jschema = case["schema"] |
| 221 | + assert isinstance(jschema, (bool, dict)), "boolean or object schema" |
| 222 | + tests = case["tests"] |
| 223 | + assert isinstance(tests, list), "tests is a list" |
| 224 | + assert all(isinstance(t, dict) for t in tests), "tests are objects" |
| 225 | + assert all("instance" in t for t in tests), "tests contain instance" |
| 226 | + description = case.get("description") |
| 227 | + assert description is None or isinstance(description, str) |
| 228 | + |
| 229 | + CACHE.mkdir(exist_ok=True) |
| 230 | + results: JsonArray = [] |
| 231 | + |
| 232 | + try: |
| 233 | + # put registries in cache |
| 234 | + for reg in [SPECS, case.get("registry")]: |
| 235 | + if reg is not None: |
| 236 | + for url, schema in reg.items(): |
| 237 | + # use truncated hashed url as filename |
| 238 | + uh = hashlib.sha3_256(url.encode()).hexdigest()[:16] |
| 239 | + with Path.open(CACHE / f"{uh}.json", "w") as fp: |
| 240 | + json.dump(schema, fp) |
| 241 | + |
| 242 | + # generate validator |
| 243 | + self.compile_schema(jschema) |
| 244 | + |
| 245 | + # apply to test vector |
| 246 | + results = [ |
| 247 | + {"valid": self.run_test(test["instance"])} |
| 248 | + for test in tests |
| 249 | + ] |
| 250 | + |
| 251 | + except Exception: # an internal error occurred |
| 252 | + return { |
| 253 | + "errored": True, |
| 254 | + "seq": req["seq"], |
| 255 | + "context": {"traceback": traceback.format_exc()}, |
| 256 | + } |
| 257 | + |
| 258 | + finally: # wipe out cache to avoid state leaks |
| 259 | + shutil.rmtree(CACHE) |
| 260 | + |
| 261 | + return { |
| 262 | + "seq": req["seq"], |
| 263 | + "results": results, |
| 264 | + } |
| 265 | + |
| 266 | + def cmd_stop(self, req: JsonObject) -> JsonObject: |
| 267 | + """Stop all processing.""" |
| 268 | + sys.exit(0) |
| 269 | + |
| 270 | + def process(self, req: JsonObject) -> JsonObject: |
| 271 | + """Process one request.""" |
| 272 | + |
| 273 | + cmd = req["cmd"] |
| 274 | + match cmd: |
| 275 | + case "start": |
| 276 | + return self.cmd_start(req) |
| 277 | + case "dialect": |
| 278 | + return self.cmd_dialect(req) |
| 279 | + case "run": |
| 280 | + return self.cmd_run(req) |
| 281 | + case "stop": |
| 282 | + return self.cmd_stop(req) |
| 283 | + case _: # trigger crash |
| 284 | + raise RunnerError(f"unexpected bowtie command cmd={cmd}") |
| 285 | + |
| 286 | + def run(self): |
| 287 | + """Runner purpose is to run.""" |
| 288 | + |
| 289 | + # request/response protocol is to receive and send one-line jsons |
| 290 | + for line in sys.stdin: |
| 291 | + self.line += 1 |
| 292 | + try: |
| 293 | + req = json.loads(line) |
| 294 | + assert isinstance(req, dict), "input must be a json object" |
| 295 | + res = self.process(req) |
| 296 | + except Exception as e: |
| 297 | + sys.stderr.write(f"{self.line}: invalid json input ({e})\n") |
| 298 | + sys.stderr.flush() |
| 299 | + raise # voluntary crash |
| 300 | + sys.stdout.write(json.dumps(res)) |
| 301 | + sys.stdout.write("\n") |
| 302 | + sys.stdout.flush() |
| 303 | + |
| 304 | + |
| 305 | +if __name__ == "__main__": |
| 306 | + language = "python" if len(sys.argv) <= 1 else sys.argv[1] |
| 307 | + Runner(language, sys.argv[2:]).run() |
0 commit comments