Skip to content

Commit 1e9ff17

Browse files
author
Fabien Coelho
committed
add working jsu-compile based bowtie with C backend
1 parent 367af5c commit 1e9ff17

File tree

3 files changed

+358
-0
lines changed

3 files changed

+358
-0
lines changed

implementations/c-jsu/Dockerfile

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
FROM alpine:3.23
2+
3+
# NOTE python images do not seem to have google-re2 python wrapper available
4+
# without recompiling from sources, whereas the base alpine image has it,
5+
# so start from alpine.
6+
7+
RUN mkdir -p /usr/src/myapp
8+
WORKDIR /usr/src/myapp
9+
10+
# allow to install from package (not set) or build from sources (branch or commit)
11+
ARG JMC
12+
ARG JSU
13+
14+
RUN apk add git py3-pip py3-re2 icu-data-full
15+
16+
# force install, otherwise it would require a virtual environment
17+
RUN pip install --break-system-packages jsonschema-specifications
18+
RUN if [ "$JMC" ] ; then jmc="git+https://github.com/clairey-zx81/json-model@$JMC" ; fi ; \
19+
pip install --break-system-packages "${jmc:-json_model_compiler}"
20+
RUN if [ "$JSU" ] ; then jsu="git+https://github.com/zx80/json-schema-utils@$JSU" ; fi ; \
21+
pip install --break-system-packages "${jsu:-json_schema_utils}"
22+
23+
# c-specific setup
24+
# NOTE avoid unavailable re2 C wrapper that needs to be installed from sources
25+
RUN apk add gcc musl-dev jansson jansson-dev pcre2 pcre2-dev
26+
27+
COPY bowtie_jsu_compile.py .
28+
CMD ["python3", "./bowtie_jsu_compile.py", "C", "--regex-engine", "pcre2"]

implementations/c-jsu/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# JSON Schema Utils Compiler
2+
3+
The [JSU compiler](https://github.com/zx80/json-schema-utils) converts a
4+
[schema](https://json-schema.org/) to a [model](https://json-model.org/)
5+
internally and then uses [jmc](https://json-model.org/#/JMC) as a backend
6+
to generate a validator in C, Python, JS, Java, PL/pgSQL or Perl.
7+
8+
## Manual Testing
9+
10+
```sh
11+
docker build --no-cache -t docker.io/zx80/bowtie-jsu -f Dockerfile .
12+
docker build --no-cache --build-arg JMC=dev --build-arg JSU=dev -t docker.io/zx80/bowtie-jsu -f Dockerfile .
13+
docker image ls zx80/bowtie-jsu
14+
docker run --rm --entrypoint /bin/sh -it zx80/bowtie-jsu
15+
docker run --rm -i zx80/bowtie-jsu
16+
bowtie smoke -i docker.io/zx80/bowtie-jsu
17+
18+
for version in 7 6 4 3 2019 2020 ; do
19+
echo "# version $version"
20+
bowtie suite -i docker.io/zx80/bowtie-jsu $version > suite_$version.jsonl
21+
bowtie summary -s failures suite_$version.jsonl
22+
done
23+
```
Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
#! /usr/bin/env python3
2+
3+
"""
4+
A generic Bowtie harness for the JSON Schema Utils validator using
5+
the JSON Model Compiler as a backend for a target language.
6+
7+
The harness invokes the "jsu-compile" command to generate a validator and
8+
then the generated validator script or executable to validate each test.
9+
"""
10+
11+
from pathlib import Path
12+
import hashlib
13+
import json
14+
import os
15+
import platform
16+
import shutil
17+
import subprocess
18+
import sys
19+
import traceback
20+
21+
from jsonschema_specifications import REGISTRY
22+
23+
type JsonObject = dict[str, Json]
24+
type JsonArray = list[Json]
25+
type Json = None | bool | int | float | str | JsonArray | JsonObject
26+
27+
# available JSON Schema specifications
28+
SPECS: dict[str, JsonObject] = {
29+
url: REGISTRY.contents(url) for url in REGISTRY
30+
}
31+
32+
# JSON Schema version URL to internal version
33+
VERSIONS: dict[str, int] = {
34+
"https://json-schema.org/draft/2020-12/schema": 9,
35+
"https://json-schema.org/draft/2019-09/schema": 8,
36+
"http://json-schema.org/draft-07/schema#": 7,
37+
"http://json-schema.org/draft-06/schema#": 6,
38+
"http://json-schema.org/draft-04/schema#": 4,
39+
"http://json-schema.org/draft-03/schema#": 3,
40+
}
41+
42+
# cache is used for registry and meta schemas
43+
CACHE: Path = Path(__file__).parent / "schema-cache-by-hashed-urls"
44+
45+
# directory for temporary files
46+
TMP: Path = Path(__file__).parent / "work"
47+
48+
# environment variables
49+
ENV: Path = Path(__file__).parent / ".env"
50+
51+
def get_version(cmd: list[str]) -> str:
52+
"""Run external command and return first non empty output line."""
53+
ps = subprocess.run(cmd, text=True, capture_output=True, check=True) # noqa: S603
54+
lines = list(filter(lambda s: s != "", ps.stdout.split("\n")))
55+
return lines[0]
56+
57+
def json_file(filename: str, data: Json) -> Path:
58+
"""Put JSON data into a temporary file."""
59+
file: Path = TMP / filename
60+
with Path.open(file, "w") as sf:
61+
json.dump(data, sf)
62+
return file
63+
64+
65+
class RunnerError(Exception):
66+
pass
67+
68+
69+
class Runner:
70+
71+
def __init__(self, language: str = "python", options: list[str] = []):
72+
73+
# setup environment
74+
if ENV.exists():
75+
with Path.open(ENV) as env:
76+
for line in env:
77+
if line.startswith("export ") and "=" in line:
78+
var, val = line[7:].rstrip().split("=", 1)
79+
os.environ[var] = val
80+
81+
# setup language
82+
self.language: str = language.lower()
83+
84+
# current dialect
85+
self.version: int | None = None
86+
87+
# count input lines for some error messages
88+
self.line: int = 0
89+
90+
# compiler output file
91+
self.output: str
92+
93+
# how to execute the generated file
94+
self.runner: list[str]
95+
96+
# command to get the language version
97+
vers_cmd: list[str]
98+
99+
# per-language settings
100+
match self.language:
101+
case "python":
102+
self.output = TMP / "schema.py"
103+
self.runner = ["python", str(self.output)]
104+
vers_cmd = ["python", "--version"]
105+
case "c":
106+
self.output = TMP / "schema.out"
107+
self.runner = [str(self.output)]
108+
vers_cmd = ["cc", "--version"]
109+
case "js": # requires node_modules
110+
self.output = TMP / "schema.js"
111+
self.runner = ["node", str(self.output)]
112+
vers_cmd = ["node", "--version"]
113+
case "java": # requires CLASSPATH
114+
self.output = TMP / "schema.class"
115+
self.runner = ["java", "schema", "-j", "GSON"]
116+
vers_cmd = ["java", "--version"]
117+
case "perl": # requires PERLLIB
118+
self.output = TMP / "schema.pl"
119+
self.runner = ["perl", str(self.output)]
120+
# perl --version is too verbose, use a short script
121+
vers_cmd = ["perl", "-e", 'print "Perl $^V\n"']
122+
case "plpgsql": # requires a running Postgres
123+
self.output = TMP / "schema.sql"
124+
self.runner = ["run_plpgsql.sh", str(self.output)]
125+
vers_cmd = ["psql", "--version"]
126+
case _:
127+
raise RunnerError(f"unexpected language: {language}")
128+
129+
self.language_version = get_version(vers_cmd)
130+
131+
# compiler call prefix missing version, output file and input schema
132+
self.jsu_compile = [
133+
"jsu-compile",
134+
"--cache", str(CACHE),
135+
"--no-fix", # do not try to fix the schema
136+
"--no-strict", # accept any odd looking schema
137+
"--no-reporting", # do not generate location reporting code
138+
"--loose", # ints are floats, floats may be ints
139+
# next options may override the above defaults
140+
*options,
141+
]
142+
self.jsu_version = get_version(["jsu-compile", "--version"])
143+
144+
TMP.mkdir(exist_ok=True)
145+
146+
def compile_schema(self, schema: JsonObject) -> Path:
147+
"""Compile a schema for the current language."""
148+
149+
schema_file = json_file("schema.json", schema)
150+
output_file = TMP / self.output
151+
152+
jsu_compile = [
153+
*self.jsu_compile,
154+
"--schema-version", str(self.version or 7),
155+
"-o", str(output_file),
156+
str(schema_file),
157+
]
158+
159+
subprocess.run(jsu_compile, text=True, check=True) # noqa: S603
160+
161+
return output_file
162+
163+
def run_test(self, test: Json) -> bool:
164+
"""Run one test using generated validator."""
165+
166+
test_file = json_file("test.json", test)
167+
168+
ps = subprocess.run( # noqa: S603
169+
[ *self.runner, str(test_file) ],
170+
text=True, capture_output=True, check=True,
171+
)
172+
173+
if "FAIL" in ps.stdout:
174+
return False
175+
elif "PASS" in ps.stdout:
176+
return True
177+
else:
178+
raise RunnerError(f"unexpected validation output: {ps.output}")
179+
180+
def cmd_start(self, req: JsonObject) -> JsonObject:
181+
"""Respond to start with various meta data about the implementation."""
182+
183+
assert req.get("version") == 1, "expecting protocol version 1"
184+
185+
return {
186+
"version": 1,
187+
"implementation": {
188+
"name": "jsu-compile",
189+
"version": self.jsu_version,
190+
"language": self.language,
191+
"language_version": self.language_version,
192+
"os": platform.system(),
193+
"os_version": platform.release(),
194+
"dialects": sorted(VERSIONS.keys()),
195+
"homepage": "https://github.com/zx80/json-schema-utils/",
196+
"documentation": "https://github.com/zx80/json-schema-utils/",
197+
"issues": "https://github.com/zx80/json-schema-utils/issues",
198+
"source": "https://github.com/zx80/json-schema-utils.git",
199+
},
200+
}
201+
202+
def cmd_dialect(self, req: JsonObject) -> JsonObject:
203+
"""Set current JSON Schema dialect, needed for schema semantics."""
204+
205+
assert "dialect" in req, "dialect command expects a dialect"
206+
207+
try:
208+
self.version = VERSIONS[req["dialect"]]
209+
except KeyError: # unknown version
210+
self.version = 0
211+
212+
return {"ok": True}
213+
214+
def cmd_run(self, req: JsonObject) -> JsonObject:
215+
"""Run one case and its tests."""
216+
217+
# sanity checks out of the try/except
218+
case = req["case"]
219+
assert isinstance(case, dict), "case is an object"
220+
jschema = case["schema"]
221+
assert isinstance(jschema, (bool, dict)), "boolean or object schema"
222+
tests = case["tests"]
223+
assert isinstance(tests, list), "tests is a list"
224+
assert all(isinstance(t, dict) for t in tests), "tests are objects"
225+
assert all("instance" in t for t in tests), "tests contain instance"
226+
description = case.get("description")
227+
assert description is None or isinstance(description, str)
228+
229+
CACHE.mkdir(exist_ok=True)
230+
results: JsonArray = []
231+
232+
try:
233+
# put registries in cache
234+
for reg in [SPECS, case.get("registry")]:
235+
if reg is not None:
236+
for url, schema in reg.items():
237+
# use truncated hashed url as filename
238+
uh = hashlib.sha3_256(url.encode()).hexdigest()[:16]
239+
with Path.open(CACHE / f"{uh}.json", "w") as fp:
240+
json.dump(schema, fp)
241+
242+
# generate validator
243+
self.compile_schema(jschema)
244+
245+
# apply to test vector
246+
results = [
247+
{"valid": self.run_test(test["instance"])}
248+
for test in tests
249+
]
250+
251+
except Exception: # an internal error occurred
252+
return {
253+
"errored": True,
254+
"seq": req["seq"],
255+
"context": {"traceback": traceback.format_exc()},
256+
}
257+
258+
finally: # wipe out cache to avoid state leaks
259+
shutil.rmtree(CACHE)
260+
261+
return {
262+
"seq": req["seq"],
263+
"results": results,
264+
}
265+
266+
def cmd_stop(self, req: JsonObject) -> JsonObject:
267+
"""Stop all processing."""
268+
sys.exit(0)
269+
270+
def process(self, req: JsonObject) -> JsonObject:
271+
"""Process one request."""
272+
273+
cmd = req["cmd"]
274+
match cmd:
275+
case "start":
276+
return self.cmd_start(req)
277+
case "dialect":
278+
return self.cmd_dialect(req)
279+
case "run":
280+
return self.cmd_run(req)
281+
case "stop":
282+
return self.cmd_stop(req)
283+
case _: # trigger crash
284+
raise RunnerError(f"unexpected bowtie command cmd={cmd}")
285+
286+
def run(self):
287+
"""Runner purpose is to run."""
288+
289+
# request/response protocol is to receive and send one-line jsons
290+
for line in sys.stdin:
291+
self.line += 1
292+
try:
293+
req = json.loads(line)
294+
assert isinstance(req, dict), "input must be a json object"
295+
res = self.process(req)
296+
except Exception as e:
297+
sys.stderr.write(f"{self.line}: invalid json input ({e})\n")
298+
sys.stderr.flush()
299+
raise # voluntary crash
300+
sys.stdout.write(json.dumps(res))
301+
sys.stdout.write("\n")
302+
sys.stdout.flush()
303+
304+
305+
if __name__ == "__main__":
306+
language = "python" if len(sys.argv) <= 1 else sys.argv[1]
307+
Runner(language, sys.argv[2:]).run()

0 commit comments

Comments
 (0)