Skip to content

Commit 280d97b

Browse files
aldehirCISC
andauthored
grammar : support array references in json schema (#16792)
* grammar : support array references in json schema * Update json-schema-to-grammar.cpp Co-authored-by: Sigbjørn Skjæret <[email protected]> * grammar : improve regex when naming ref derived rules * grammar : replace non-conformant definitions array with anyOf test case --------- Co-authored-by: Sigbjørn Skjæret <[email protected]>
1 parent 3479efd commit 280d97b

File tree

4 files changed

+87
-18
lines changed

4 files changed

+87
-18
lines changed

common/json-schema-to-grammar.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,10 @@ class SchemaConverter {
601601
}
602602

603603
std::string _resolve_ref(const std::string & ref) {
604-
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
604+
auto it = ref.find('#');
605+
std::string ref_fragment = it != std::string::npos ? ref.substr(it + 1) : ref;
606+
static const std::regex nonalphanumeric_regex(R"([^a-zA-Z0-9-]+)");
607+
std::string ref_name = "ref" + std::regex_replace(ref_fragment, nonalphanumeric_regex, "-");
605608
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
606609
_refs_being_resolved.insert(ref);
607610
json resolved = _refs[ref];
@@ -774,11 +777,24 @@ class SchemaConverter {
774777
std::vector<std::string> tokens = string_split(pointer, "/");
775778
for (size_t i = 1; i < tokens.size(); ++i) {
776779
std::string sel = tokens[i];
777-
if (target.is_null() || !target.contains(sel)) {
780+
if (target.is_object() && target.contains(sel)) {
781+
target = target[sel];
782+
} else if (target.is_array()) {
783+
size_t sel_index;
784+
try {
785+
sel_index = std::stoul(sel);
786+
} catch (const std::invalid_argument & e) {
787+
sel_index = target.size();
788+
}
789+
if (sel_index >= target.size()) {
790+
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
791+
return;
792+
}
793+
target = target[sel_index];
794+
} else {
778795
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
779796
return;
780797
}
781-
target = target[sel];
782798
}
783799
_refs[ref] = target;
784800
}

examples/json_schema_to_grammar.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,17 @@ def visit(n: dict):
371371
raise ValueError(f'Unsupported ref {ref}')
372372

373373
for sel in ref.split('#')[-1].split('/')[1:]:
374-
assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
375-
target = target[sel]
374+
assert target is not None, f'Error resolving ref {ref}: {sel} not in {target}'
375+
if isinstance(target, list):
376+
try:
377+
sel_index = int(sel)
378+
except ValueError:
379+
raise ValueError(f'Error resolving ref {ref}: {sel} not in {target}')
380+
assert 0 <= sel_index < len(target), f'Error resolving ref {ref}: {sel} not in {target}'
381+
target = target[sel_index]
382+
else:
383+
assert sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
384+
target = target[sel]
376385

377386
self._refs[ref] = target
378387
else:
@@ -547,7 +556,8 @@ def join_seq():
547556

548557

549558
def _resolve_ref(self, ref):
550-
ref_name = ref.split('/')[-1]
559+
ref_fragment = ref.split('#')[-1]
560+
ref_name = 'ref' + re.sub(r'[^a-zA-Z0-9-]+', '-', ref_fragment)
551561
if ref_name not in self._rules and ref not in self._refs_being_resolved:
552562
self._refs_being_resolved.add(ref)
553563
resolved = self._refs[ref]

tests/test-json-schema-to-grammar.cpp

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,9 +1124,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
11241124
})""",
11251125
R"""(
11261126
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
1127-
foo ::= "{" space foo-a-kv "}" space
1128-
foo-a-kv ::= "\"a\"" space ":" space string
1129-
root ::= foo
1127+
ref-definitions-foo ::= "{" space ref-definitions-foo-a-kv "}" space
1128+
ref-definitions-foo-a-kv ::= "\"a\"" space ":" space string
1129+
root ::= ref-definitions-foo
11301130
space ::= | " " | "\n"{1,2} [ \t]{0,20}
11311131
string ::= "\"" char* "\"" space
11321132
)"""
@@ -1151,20 +1151,58 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
11511151
"type": "object"
11521152
})""",
11531153
R"""(
1154-
alternative-0 ::= foo
1155-
alternative-1 ::= bar
1156-
bar ::= "{" space (bar-b-kv )? "}" space
1157-
bar-b-kv ::= "\"b\"" space ":" space number
1154+
alternative-0 ::= ref-definitions-foo
1155+
alternative-1 ::= ref-definitions-bar
11581156
decimal-part ::= [0-9]{1,16}
1159-
foo ::= "{" space (foo-a-kv )? "}" space
1160-
foo-a-kv ::= "\"a\"" space ":" space number
11611157
integral-part ::= [0] | [1-9] [0-9]{0,15}
11621158
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
1159+
ref-definitions-bar ::= "{" space (ref-definitions-bar-b-kv )? "}" space
1160+
ref-definitions-bar-b-kv ::= "\"b\"" space ":" space number
1161+
ref-definitions-foo ::= "{" space (ref-definitions-foo-a-kv )? "}" space
1162+
ref-definitions-foo-a-kv ::= "\"a\"" space ":" space number
11631163
root ::= alternative-0 | alternative-1
11641164
space ::= | " " | "\n"{1,2} [ \t]{0,20}
11651165
)"""
11661166
});
11671167

1168+
test({
1169+
SUCCESS,
1170+
"anyOf $ref",
1171+
R"""({
1172+
"properties": {
1173+
"a": {
1174+
"anyOf": [
1175+
{"type": "string"},
1176+
{"type": "number"}
1177+
]
1178+
},
1179+
"b": {
1180+
"anyOf": [
1181+
{"$ref": "#/properties/a/anyOf/0"},
1182+
{"type": "boolean"}
1183+
]
1184+
}
1185+
},
1186+
"type": "object"
1187+
})""",
1188+
R"""(
1189+
a ::= string | number
1190+
a-kv ::= "\"a\"" space ":" space a
1191+
a-rest ::= ( "," space b-kv )?
1192+
b ::= b-0 | boolean
1193+
b-0 ::= string
1194+
b-kv ::= "\"b\"" space ":" space b
1195+
boolean ::= ("true" | "false") space
1196+
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
1197+
decimal-part ::= [0-9]{1,16}
1198+
integral-part ::= [0] | [1-9] [0-9]{0,15}
1199+
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
1200+
root ::= "{" space (a-kv a-rest | b-kv )? "}" space
1201+
space ::= | " " | "\n"{1,2} [ \t]{0,20}
1202+
string ::= "\"" char* "\"" space
1203+
)"""
1204+
});
1205+
11681206
test({
11691207
SUCCESS,
11701208
"mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",

tools/server/public_legacy/json-schema-to-grammar.mjs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,14 @@ export class SchemaConverter {
345345

346346
const selectors = ref.split('#')[1].split('/').slice(1);
347347
for (const sel of selectors) {
348-
if (!target || !(sel in target)) {
348+
const selIndex = parseInt(sel, 10);
349+
if (target && sel in target) {
350+
target = target[sel];
351+
} else if (target && selIndex in target) {
352+
target = target[selIndex];
353+
} else {
349354
throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
350355
}
351-
target = target[sel];
352356
}
353357

354358
this._refs[ref] = target;
@@ -594,7 +598,8 @@ export class SchemaConverter {
594598
}
595599

596600
_resolveRef(ref) {
597-
let refName = ref.split('/').pop();
601+
let refFragment = ref.split('#').pop();
602+
let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-');
598603
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
599604
this._refsBeingResolved.add(ref);
600605
const resolved = this._refs[ref];

0 commit comments

Comments
 (0)