Skip to content

Commit f493f0c

Browse files
committed
schema loading performance improvement - fixes #348
Context: properly handle references by $id values, whenever we encounter a $ref, after resolving the root schema json, we have to check if the fragment part of the $ref identifies a subschema by $id. (We have to do it even in cases when the fragment part is a valid json pointer, because nothing stops a schema author to set $id to be a json pointer string - see `ReferenceLookupTest#idAsJsonPointerWorks()` covering this case). To perform this $id check, until now, we always deep-traversed the entire root schema json for each encountered $ref, looking for a matching $id. This repeated deep-traversal caused crazy performance problems with extremely big schemas, utilizing a lot of $refs. This change fixes this problem by deep-traversing each involved json document at most once, at least for document-local references. `LoadingState` gains a <rootSchema, SubschemaRegistry> map, where SubschemaRegistry is essentially a map of <$id, subschemaJson> pairs. The registry for a root json is initialized at the first time when it is necessary to look up a $ref in the json. The registry eagerly deep-traverses the whole root json, and collects the $id -> subschemaJson pairs. Later the lookup-by-$id part of the $ref lookup process is just an O(1) hashmap lookup. API changes: no, all affected and newly introduced classes are package-private. Performance improvements based on local testing: ran a validation with ECV-JsonSchema-tNoBOM.json schema and empty json instance ({}), * with 1.12.0: loading didn't complete (killed the process after 20 minutes) * with HEAD: schema loaded & validation passed in ~30 seconds.
1 parent 2a44566 commit f493f0c

File tree

8 files changed

+168
-36
lines changed

8 files changed

+168
-36
lines changed

core/src/main/java/org/everit/json/schema/loader/JsonPointerEvaluator.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,7 @@ static final JsonPointerEvaluator forDocument(JsonObject document, String fragme
111111

112112
private static JsonObject configureBasedOnState(JsonObject obj, LoadingState callingState, String id) {
113113
URI documentURI = validateURI(callingState, id).asJavaURI();
114-
obj.ls = new LoadingState(callingState.config,
115-
callingState.pointerSchemas, obj, obj,
116-
documentURI, new SchemaLocation(documentURI, emptyList()));
114+
obj.ls = callingState.createCopyForNewSchemaJson(documentURI, obj, new SchemaLocation(documentURI, emptyList()));
117115
return obj;
118116
}
119117

core/src/main/java/org/everit/json/schema/loader/LoadingState.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.util.Collection;
99
import java.util.Collections;
1010
import java.util.Comparator;
11+
import java.util.HashMap;
1112
import java.util.List;
1213
import java.util.Map;
1314

@@ -34,7 +35,7 @@ static URI extractChildId(URI parentScopeId, Object childJson, String idKeyword)
3435
return parentScopeId;
3536
}
3637

37-
static final Comparator<Class<?>> CLASS_COMPARATOR = (cl1, cl2) -> cl1.getSimpleName().compareTo(cl2.getSimpleName());
38+
static final Comparator<Class<?>> CLASS_COMPARATOR = Comparator.comparing(Class::getSimpleName);
3839

3940
final LoaderConfig config;
4041

@@ -48,16 +49,29 @@ static URI extractChildId(URI parentScopeId, Object childJson, String idKeyword)
4849

4950
final JsonValue schemaJson;
5051

52+
final Map<JsonValue, SubschemaRegistry> subschemaRegistries;
53+
54+
LoadingState(LoaderConfig config,
55+
Map<String, ReferenceKnot> pointerSchemas,
56+
Object rootSchemaJson,
57+
Object schemaJson,
58+
URI parentScopeId,
59+
SchemaLocation pointerToCurrentObj) {
60+
this(config, pointerSchemas, rootSchemaJson, schemaJson, parentScopeId, pointerToCurrentObj, new HashMap<>());
61+
}
62+
5163
LoadingState(LoaderConfig config,
5264
Map<String, ReferenceKnot> pointerSchemas,
5365
Object rootSchemaJson,
5466
Object schemaJson,
5567
URI parentScopeId,
56-
SchemaLocation pointerToCurrentObj) {
68+
SchemaLocation pointerToCurrentObj,
69+
Map<JsonValue, SubschemaRegistry> subschemaRegistries) {
5770
this.config = config;
5871
this.pointerSchemas = requireNonNull(pointerSchemas, "pointerSchemas cannot be null");
5972
this.id = extractChildId(parentScopeId, schemaJson, config.specVersion.idKeyword());
6073
this.pointerToCurrentObj = requireNonNull(pointerToCurrentObj, "pointerToCurrentObj cannot be null");
74+
this.subschemaRegistries = requireNonNull(subschemaRegistries, "subschemaRegistries cannot be null");
6175
this.rootSchemaJson = JsonValue.of(rootSchemaJson);
6276
if (this.rootSchemaJson.ls == null) {
6377
this.rootSchemaJson.ls = this;
@@ -67,7 +81,7 @@ static URI extractChildId(URI parentScopeId, Object childJson, String idKeyword)
6781
}
6882

6983
SchemaLoader.SchemaLoaderBuilder initNewDocumentLoader() {
70-
return config.initLoader().pointerSchemas(pointerSchemas);
84+
return config.initLoader().pointerSchemas(pointerSchemas).subschemaRegistries(subschemaRegistries);
7185
}
7286

7387
private Object getRawChildOfObject(JsonObject obj, String key) {
@@ -103,7 +117,8 @@ JsonValue childFor(String key) {
103117
rootSchemaJson,
104118
rawChild,
105119
id,
106-
pointerToCurrentObj.addPointerSegment(key)
120+
pointerToCurrentObj.addPointerSegment(key),
121+
subschemaRegistries
107122
);
108123
return childLs.schemaJson;
109124
}
@@ -136,6 +151,10 @@ SchemaException createSchemaException(Class<?> actualType, Class<?> expectedType
136151
return new SchemaException(locationOfCurrentObj(), actualType, expectedType, furtherExpectedTypes);
137152
}
138153

154+
LoadingState createCopyForNewSchemaJson(URI parentScopeId, JsonValue newRootJson, SchemaLocation locationOfNewRootJson) {
155+
return new LoadingState(config, pointerSchemas, newRootJson, newRootJson, parentScopeId, locationOfNewRootJson, subschemaRegistries);
156+
}
157+
139158
SchemaException createSchemaException(Class<?> actualType, Collection<Class<?>> expectedTypes) {
140159
ArrayList<Class<?>> sortedTypes = new ArrayList<>(expectedTypes);
141160
Collections.sort(sortedTypes, CLASS_COMPARATOR);
@@ -145,4 +164,8 @@ SchemaException createSchemaException(Class<?> actualType, Collection<Class<?>>
145164
SpecificationVersion specVersion() {
146165
return config.specVersion;
147166
}
167+
168+
SubschemaRegistry getSubschemaRegistry(JsonValue rootJson) {
169+
return subschemaRegistries.computeIfAbsent(rootJson, SubschemaRegistry::new);
170+
}
148171
}

core/src/main/java/org/everit/json/schema/loader/ReferenceLookup.java

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -68,31 +68,7 @@ static Map<String, Object> extend(Map<String, Object> additional, Map<String, Ob
6868
}
6969

7070
static JsonObject lookupObjById(JsonValue val, String idAttrVal) {
71-
String idKeyword = val.ls.specVersion().idKeyword();
72-
if (val instanceof JsonObject) {
73-
JsonObject obj = (JsonObject) val;
74-
if (obj.containsKey(idKeyword)
75-
&& obj.require(idKeyword).typeOfValue() == String.class
76-
&& obj.require(idKeyword).requireString().equals(idAttrVal)) {
77-
return obj;
78-
}
79-
for (String key : obj.keySet()) {
80-
JsonObject maybeFound = lookupObjById(obj.require(key), idAttrVal);
81-
if (maybeFound != null) {
82-
return maybeFound;
83-
}
84-
}
85-
} else if (val instanceof JsonArray) {
86-
JsonArray arr = (JsonArray) val;
87-
for (int i = 0; i < arr.length(); ++i) {
88-
JsonObject maybeFound = lookupObjById(arr.at(i), idAttrVal);
89-
if (maybeFound != null) {
90-
return maybeFound;
91-
}
92-
}
93-
}
94-
95-
return null;
71+
return val.ls.getSubschemaRegistry(val).getById(idAttrVal);
9672
}
9773

9874
/**
@@ -197,7 +173,8 @@ private Schema.Builder<?> createReferenceSchema(String relPointerString, String
197173

198174
private JsonObject initJsonObjectById(URI id) {
199175
JsonObject o = JsonValue.of(ls.config.schemasByURI.get(id)).requireObject();
200-
new LoadingState(ls.config, ls.pointerSchemas, o, o, id, SchemaLocation.parseURI(id.toString()));
176+
// new LoadingState(ls.config, ls.pointerSchemas, o, o, id, SchemaLocation.parseURI(id.toString()), ls.subschemaRegistries);
177+
ls.createCopyForNewSchemaJson(id, o, SchemaLocation.parseURI(id.toString()));
201178
return o;
202179
}
203180

core/src/main/java/org/everit/json/schema/loader/SchemaLoader.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ public static class SchemaLoaderBuilder {
5555

5656
Map<String, ReferenceKnot> pointerSchemas = new HashMap<>();
5757

58+
Map<JsonValue, SubschemaRegistry> subschemaRegistries = new HashMap<>();
59+
5860
URI id;
5961

6062
SchemaLocation pointerToCurrentObj = SchemaLocation.empty();
@@ -204,6 +206,11 @@ SchemaLoaderBuilder pointerSchemas(Map<String, ReferenceKnot> pointerSchemas) {
204206
return this;
205207
}
206208

209+
SchemaLoaderBuilder subschemaRegistries(Map<JsonValue, SubschemaRegistry> subschemaRegistries) {
210+
this.subschemaRegistries = subschemaRegistries;
211+
return this;
212+
}
213+
207214
SchemaLoaderBuilder rootSchemaJson(Object rootSchemaJson) {
208215
this.rootSchemaJson = rootSchemaJson;
209216
return this;
@@ -346,7 +353,8 @@ public SchemaLoader(SchemaLoaderBuilder builder) {
346353
effectiveRootSchemaJson,
347354
builder.schemaJson,
348355
builder.id,
349-
builder.pointerToCurrentObj);
356+
builder.pointerToCurrentObj,
357+
builder.subschemaRegistries);
350358
}
351359

352360
private static Optional<String> extractSchemaKeywordValue(Object effectiveRootSchemaJson) {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package org.everit.json.schema.loader;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
class SubschemaRegistry {
7+
8+
final Map<String, JsonObject> storage = new HashMap<>();
9+
10+
11+
SubschemaRegistry(JsonValue rootJson) {
12+
collectObjectsWithId(rootJson);
13+
}
14+
15+
void collectObjectsWithId(JsonValue val) {
16+
String idKeyword = val.ls.specVersion().idKeyword();
17+
if (val instanceof JsonObject) {
18+
JsonObject obj = (JsonObject) val;
19+
if (obj.containsKey(idKeyword)
20+
&& obj.require(idKeyword).typeOfValue() == String.class) {
21+
storage.put(obj.require(idKeyword).requireString(), obj);
22+
}
23+
for (String key : obj.keySet()) {
24+
collectObjectsWithId(obj.require(key));
25+
}
26+
} else if (val instanceof JsonArray) {
27+
JsonArray arr = (JsonArray) val;
28+
for (int i = 0; i < arr.length(); ++i) {
29+
collectObjectsWithId(arr.at(i));
30+
}
31+
}
32+
}
33+
34+
JsonObject getById(String id) {
35+
return storage.get(id);
36+
}
37+
}

core/src/test/java/org/everit/json/schema/loader/LoadingStateTest.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,16 @@
44
import static java.util.Collections.emptyMap;
55
import static org.everit.json.schema.loader.JsonValueTest.withLs;
66
import static org.junit.Assert.assertEquals;
7+
import static org.junit.Assert.assertNotNull;
8+
import static org.junit.Assert.assertSame;
9+
import static org.junit.Assert.assertTrue;
710
import static org.junit.Assert.fail;
811

912
import java.util.HashMap;
1013
import java.util.Map;
1114

15+
import org.everit.json.schema.ResourceLoader;
16+
import org.everit.json.schema.Schema;
1217
import org.everit.json.schema.SchemaException;
1318
import org.everit.json.schema.SchemaLocation;
1419
import org.everit.json.schema.loader.internal.DefaultSchemaClient;
@@ -20,9 +25,10 @@
2025
*/
2126
public class LoadingStateTest {
2227

28+
private static final LoaderConfig CONFIG = new LoaderConfig(new DefaultSchemaClient(), emptyMap(), SpecificationVersion.DRAFT_4, false);
29+
2330
private LoadingState emptySubject() {
24-
LoaderConfig config = new LoaderConfig(new DefaultSchemaClient(), emptyMap(), SpecificationVersion.DRAFT_4, false);
25-
return new LoadingState(config, emptyMap(), new HashMap<>(),
31+
return new LoadingState(CONFIG, emptyMap(), new HashMap<>(),
2632
new HashMap<>(), null, SchemaLocation.empty());
2733
}
2834

@@ -102,4 +108,16 @@ protected LoadingState singleElemArrayState() {
102108
return withLs(JsonValue.of(asList("elem"))).ls;
103109
}
104110

111+
@Test
112+
public void testGetSubschemaRegistry() {
113+
JsonValue obj = JsonValue.of(ResourceLoader.DEFAULT.readObj("objecttestcases.json").getJSONObject("nestedIdV6"));
114+
Map<JsonValue, SubschemaRegistry> registries = new HashMap<>();
115+
LoadingState ls = new LoadingState(CONFIG, emptyMap(), obj, obj, null, SchemaLocation.empty(), registries);
116+
assertTrue(registries.isEmpty());
117+
SubschemaRegistry first = ls.getSubschemaRegistry(obj),
118+
second = ls.getSubschemaRegistry(obj);
119+
assertNotNull(first);
120+
assertSame(first, second);
121+
}
122+
105123
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package org.everit.json.schema.loader;
2+
3+
import static java.util.Collections.emptyMap;
4+
import static org.junit.Assert.assertEquals;
5+
import static org.junit.Assert.assertSame;
6+
7+
import org.everit.json.schema.ResourceLoader;
8+
import org.everit.json.schema.SchemaLocation;
9+
import org.everit.json.schema.loader.internal.DefaultSchemaClient;
10+
import org.junit.Test;
11+
12+
public class SubschemaRegistryTest {
13+
14+
static final LoaderConfig CONFIG = new LoaderConfig(new DefaultSchemaClient(), emptyMap(), SpecificationVersion.DRAFT_6, false);
15+
16+
@Test
17+
public void emptySchemaContainsNoElems() {
18+
JsonValue obj = JsonValue.of(emptyMap());
19+
new LoadingState(CONFIG, emptyMap(), obj, obj, null, SchemaLocation.empty());
20+
21+
SubschemaRegistry registry = new SubschemaRegistry(obj);
22+
23+
assertEquals(0, registry.storage.size());
24+
}
25+
26+
@Test
27+
public void topLevelIdIsRecognized() {
28+
JsonValue obj = JsonValue.of(ResourceLoader.DEFAULT.readObj("testschemas.json").getJSONObject("schemaWithIdV6"));
29+
new LoadingState(CONFIG, emptyMap(), obj, obj, null, SchemaLocation.empty());
30+
31+
SubschemaRegistry registry = new SubschemaRegistry(obj);
32+
33+
JsonObject actual = registry.getById("http://example.org/schema/");
34+
assertSame(obj, actual);
35+
}
36+
37+
@Test
38+
public void childInObjById_isRecognized() {
39+
JsonValue obj = JsonValue.of(ResourceLoader.DEFAULT.readObj("ref-lookup-tests.json"));
40+
new LoadingState(CONFIG, emptyMap(), obj, obj, null, SchemaLocation.empty());
41+
42+
SubschemaRegistry registry = new SubschemaRegistry(obj);
43+
44+
JsonObject actual = registry.getById("has-id");
45+
JsonValue expected = obj.requireObject().require("definitions").requireObject().require("HasId");
46+
assertEquals(expected.unwrap(), actual.unwrap());
47+
}
48+
49+
@Test
50+
public void childInArrayById_isRecognized() {
51+
JsonValue obj = JsonValue.of(ResourceLoader.DEFAULT.readObj("ref-lookup-tests.json"));
52+
new LoadingState(CONFIG, emptyMap(), obj, obj, null, SchemaLocation.empty());
53+
54+
SubschemaRegistry registry = new SubschemaRegistry(obj);
55+
56+
JsonObject actual = registry.getById("all-of-part-0");
57+
JsonValue expected = obj.requireObject().require("definitions").requireObject().require("someAllOf")
58+
.requireObject().require("allOf").requireArray().at(0);
59+
assertEquals(expected.unwrap(), actual.unwrap());
60+
}
61+
62+
63+
}

core/src/test/resources/org/everit/jsonvalidator/ref-lookup-tests.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@
3737
"IdIsJsonPointer": {
3838
"$id": "#/definitions/pointer",
3939
"description": "the ID can be a JSON pointer"
40+
},
41+
"someAllOf": {
42+
"allOf": [
43+
{
44+
"$id": "all-of-part-0",
45+
"type": "boolean"
46+
}
47+
]
4048
}
4149
}
4250
}

0 commit comments

Comments
 (0)