Skip to content

Commit 3bbe74f

Browse files
authored
jsonschema: make schema state independent of resolve arguments (#85)
Move schema state that depends on resolution into the Resolved struct. The remaining unexported schema fields are dependent only on the schema itself and its sub-schemas. Now a schema can be resolved multiple times, as itself or as part of other schemas. Fixes #84.
1 parent dced3e4 commit 3bbe74f

File tree

4 files changed

+171
-136
lines changed

4 files changed

+171
-136
lines changed

jsonschema/resolve.go

Lines changed: 127 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,77 @@ type Resolved struct {
2525
root *Schema
2626
// map from $ids to their schemas
2727
resolvedURIs map[string]*Schema
28+
// map from schemas to additional info computed during resolution
29+
resolvedInfos map[*Schema]*resolvedInfo
30+
}
31+
32+
func newResolved(s *Schema) *Resolved {
33+
return &Resolved{
34+
root: s,
35+
resolvedURIs: map[string]*Schema{},
36+
resolvedInfos: map[*Schema]*resolvedInfo{},
37+
}
38+
}
39+
40+
// resolvedInfo holds information specific to a schema that is computed by [Schema.Resolve].
41+
type resolvedInfo struct {
42+
s *Schema
43+
// The JSON Pointer path from the root schema to here.
44+
// Used in errors.
45+
path string
46+
// The schema's base schema.
47+
// If the schema is the root or has an ID, its base is itself.
48+
// Otherwise, its base is the innermost enclosing schema whose base
49+
// is itself.
50+
// Intuitively, a base schema is one that can be referred to with a
51+
// fragmentless URI.
52+
base *Schema
53+
// The URI for the schema, if it is the root or has an ID.
54+
// Otherwise nil.
55+
// Invariants:
56+
// s.base.uri != nil.
57+
// s.base == s <=> s.uri != nil
58+
uri *url.URL
59+
// The schema to which Ref refers.
60+
resolvedRef *Schema
61+
62+
// If the schema has a dynamic ref, exactly one of the next two fields
63+
// will be non-zero after successful resolution.
64+
// The schema to which the dynamic ref refers when it acts lexically.
65+
resolvedDynamicRef *Schema
66+
// The anchor to look up on the stack when the dynamic ref acts dynamically.
67+
dynamicRefAnchor string
68+
69+
// The following fields are independent of arguments to Schema.Resolved,
70+
// so they could live on the Schema. We put them here for simplicity.
71+
72+
// The set of required properties.
73+
isRequired map[string]bool
74+
75+
// Compiled regexps.
76+
pattern *regexp.Regexp
77+
patternProperties map[*regexp.Regexp]*Schema
78+
79+
// Map from anchors to subschemas.
80+
anchors map[string]anchorInfo
2881
}
2982

3083
// Schema returns the schema that was resolved.
3184
// It must not be modified.
3285
func (r *Resolved) Schema() *Schema { return r.root }
3386

87+
// schemaString returns a short string describing the schema.
88+
func (r *Resolved) schemaString(s *Schema) string {
89+
if s.ID != "" {
90+
return s.ID
91+
}
92+
info := r.resolvedInfos[s]
93+
if info.path != "" {
94+
return info.path
95+
}
96+
return "<anonymous schema>"
97+
}
98+
3499
// A Loader reads and unmarshals the schema at uri, if any.
35100
type Loader func(uri *url.URL) (*Schema, error)
36101

@@ -59,6 +124,8 @@ type ResolveOptions struct {
59124
// Resolve resolves all references within the schema and performs other tasks that
60125
// prepare the schema for validation.
61126
// If opts is nil, the default values are used.
127+
// The schema must not be changed after Resolve is called.
128+
// The same schema may be resolved multiple times.
62129
func (root *Schema) Resolve(opts *ResolveOptions) (*Resolved, error) {
63130
// There are up to five steps required to prepare a schema to validate.
64131
// 1. Load: read the schema from somewhere and unmarshal it.
@@ -71,9 +138,6 @@ func (root *Schema) Resolve(opts *ResolveOptions) (*Resolved, error) {
71138
// in a map from URIs to schemas within root.
72139
// 4. Resolve references: all refs in the schemas are replaced with the schema they refer to.
73140
// 5. (Optional.) If opts.ValidateDefaults is true, validate the defaults.
74-
if root.path != "" {
75-
return nil, fmt.Errorf("jsonschema: Resolve: %s already resolved", root)
76-
}
77141
r := &resolver{loaded: map[string]*Resolved{}}
78142
if opts != nil {
79143
r.opts = *opts
@@ -121,46 +185,49 @@ func (r *resolver) resolve(s *Schema, baseURI *url.URL) (*Resolved, error) {
121185
if baseURI.Fragment != "" {
122186
return nil, fmt.Errorf("base URI %s must not have a fragment", baseURI)
123187
}
124-
if err := s.check(); err != nil {
188+
rs := newResolved(s)
189+
190+
if err := s.check(rs.resolvedInfos); err != nil {
125191
return nil, err
126192
}
127193

128-
m, err := resolveURIs(s, baseURI)
129-
if err != nil {
194+
if err := resolveURIs(rs, baseURI); err != nil {
130195
return nil, err
131196
}
132-
rs := &Resolved{root: s, resolvedURIs: m}
197+
133198
// Remember the schema by both the URI we loaded it from and its canonical name,
134199
// which may differ if the schema has an $id.
135200
// We must set the map before calling resolveRefs, or ref cycles will cause unbounded recursion.
136201
r.loaded[baseURI.String()] = rs
137-
r.loaded[s.uri.String()] = rs
202+
r.loaded[rs.resolvedInfos[s].uri.String()] = rs
138203

139204
if err := r.resolveRefs(rs); err != nil {
140205
return nil, err
141206
}
142207
return rs, nil
143208
}
144209

145-
func (root *Schema) check() error {
210+
func (root *Schema) check(infos map[*Schema]*resolvedInfo) error {
146211
// Check for structural validity. Do this first and fail fast:
147212
// bad structure will cause other code to panic.
148-
if err := root.checkStructure(); err != nil {
213+
if err := root.checkStructure(infos); err != nil {
149214
return err
150215
}
151216

152217
var errs []error
153218
report := func(err error) { errs = append(errs, err) }
154219

155220
for ss := range root.all() {
156-
ss.checkLocal(report)
221+
ss.checkLocal(report, infos)
157222
}
158223
return errors.Join(errs...)
159224
}
160225

161226
// checkStructure verifies that root and its subschemas form a tree.
162227
// It also assigns each schema a unique path, to improve error messages.
163-
func (root *Schema) checkStructure() error {
228+
func (root *Schema) checkStructure(infos map[*Schema]*resolvedInfo) error {
229+
assert(len(infos) == 0, "non-empty infos")
230+
164231
var check func(reflect.Value, []byte) error
165232
check = func(v reflect.Value, path []byte) error {
166233
// For the purpose of error messages, the root schema has path "root"
@@ -173,16 +240,15 @@ func (root *Schema) checkStructure() error {
173240
if s == nil {
174241
return fmt.Errorf("jsonschema: schema at %s is nil", p)
175242
}
176-
if s.path != "" {
243+
if info, ok := infos[s]; ok {
177244
// We've seen s before.
178245
// The schema graph at root is not a tree, but it needs to
179-
// be because we assume a unique parent when we store a schema's base
180-
// in the Schema. A cycle would also put Schema.all into an infinite
181-
// recursion.
246+
// be because a schema's base must be unique.
247+
// A cycle would also put Schema.all into an infinite recursion.
182248
return fmt.Errorf("jsonschema: schemas at %s do not form a tree; %s appears more than once (also at %s)",
183-
root, s.path, p)
249+
root, info.path, p)
184250
}
185-
s.path = p
251+
infos[s] = &resolvedInfo{s: s, path: p}
186252

187253
for _, info := range schemaFieldInfos {
188254
fv := v.Elem().FieldByIndex(info.sf.Index)
@@ -224,7 +290,7 @@ func (root *Schema) checkStructure() error {
224290
// Since checking a regexp involves compiling it, checkLocal saves those compiled regexps
225291
// in the schema for later use.
226292
// It appends the errors it finds to errs.
227-
func (s *Schema) checkLocal(report func(error)) {
293+
func (s *Schema) checkLocal(report func(error), infos map[*Schema]*resolvedInfo) {
228294
addf := func(format string, args ...any) {
229295
msg := fmt.Sprintf(format, args...)
230296
report(fmt.Errorf("jsonschema.Schema: %s: %s", s, msg))
@@ -250,33 +316,35 @@ func (s *Schema) checkLocal(report func(error)) {
250316
addf("cannot validate a schema with $vocabulary")
251317
}
252318

319+
info := infos[s]
320+
253321
// Check and compile regexps.
254322
if s.Pattern != "" {
255323
re, err := regexp.Compile(s.Pattern)
256324
if err != nil {
257325
addf("pattern: %v", err)
258326
} else {
259-
s.pattern = re
327+
info.pattern = re
260328
}
261329
}
262330
if len(s.PatternProperties) > 0 {
263-
s.patternProperties = map[*regexp.Regexp]*Schema{}
331+
info.patternProperties = map[*regexp.Regexp]*Schema{}
264332
for reString, subschema := range s.PatternProperties {
265333
re, err := regexp.Compile(reString)
266334
if err != nil {
267335
addf("patternProperties[%q]: %v", reString, err)
268336
continue
269337
}
270-
s.patternProperties[re] = subschema
338+
info.patternProperties[re] = subschema
271339
}
272340
}
273341

274342
// Build a set of required properties, to avoid quadratic behavior when validating
275343
// a struct.
276344
if len(s.Required) > 0 {
277-
s.isRequired = map[string]bool{}
345+
info.isRequired = map[string]bool{}
278346
for _, r := range s.Required {
279-
s.isRequired[r] = true
347+
info.isRequired[r] = true
280348
}
281349
}
282350
}
@@ -285,8 +353,6 @@ func (s *Schema) checkLocal(report func(error)) {
285353
// to baseURI.
286354
// See https://json-schema.org/draft/2020-12/json-schema-core#section-8.2, section
287355
// 8.2.1.
288-
289-
// TODO(jba): dynamicAnchors (§8.2.2)
290356
//
291357
// Every schema has a base URI and a parent base URI.
292358
//
@@ -316,11 +382,12 @@ func (s *Schema) checkLocal(report func(error)) {
316382
// allOf/1 http://b.com (absolute $id; doesn't matter that it's not under the loaded URI)
317383
// allOf/2 http://a.com/root.json (inherited from parent)
318384
// allOf/2/not http://a.com/root.json (inherited from parent)
319-
func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
320-
resolvedURIs := map[string]*Schema{}
321-
385+
func resolveURIs(rs *Resolved, baseURI *url.URL) error {
322386
var resolve func(s, base *Schema) error
323387
resolve = func(s, base *Schema) error {
388+
info := rs.resolvedInfos[s]
389+
baseInfo := rs.resolvedInfos[base]
390+
324391
// ids are scoped to the root.
325392
if s.ID != "" {
326393
// A non-empty ID establishes a new base.
@@ -332,26 +399,27 @@ func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
332399
return fmt.Errorf("$id %s must not have a fragment", s.ID)
333400
}
334401
// The base URI for this schema is its $id resolved against the parent base.
335-
s.uri = base.uri.ResolveReference(idURI)
336-
if !s.uri.IsAbs() {
337-
return fmt.Errorf("$id %s does not resolve to an absolute URI (base is %s)", s.ID, s.base.uri)
402+
info.uri = baseInfo.uri.ResolveReference(idURI)
403+
if !info.uri.IsAbs() {
404+
return fmt.Errorf("$id %s does not resolve to an absolute URI (base is %q)", s.ID, baseInfo.uri)
338405
}
339-
resolvedURIs[s.uri.String()] = s
406+
rs.resolvedURIs[info.uri.String()] = s
340407
base = s // needed for anchors
408+
baseInfo = rs.resolvedInfos[base]
341409
}
342-
s.base = base
410+
info.base = base
343411

344412
// Anchors and dynamic anchors are URI fragments that are scoped to their base.
345413
// We treat them as keys in a map stored within the schema.
346414
setAnchor := func(anchor string, dynamic bool) error {
347415
if anchor != "" {
348-
if _, ok := base.anchors[anchor]; ok {
349-
return fmt.Errorf("duplicate anchor %q in %s", anchor, base.uri)
416+
if _, ok := baseInfo.anchors[anchor]; ok {
417+
return fmt.Errorf("duplicate anchor %q in %s", anchor, baseInfo.uri)
350418
}
351-
if base.anchors == nil {
352-
base.anchors = map[string]anchorInfo{}
419+
if baseInfo.anchors == nil {
420+
baseInfo.anchors = map[string]anchorInfo{}
353421
}
354-
base.anchors[anchor] = anchorInfo{s, dynamic}
422+
baseInfo.anchors[anchor] = anchorInfo{s, dynamic}
355423
}
356424
return nil
357425
}
@@ -368,28 +436,27 @@ func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
368436
}
369437

370438
// Set the root URI to the base for now. If the root has an $id, this will change.
371-
root.uri = baseURI
439+
rs.resolvedInfos[rs.root].uri = baseURI
372440
// The original base, even if changed, is still a valid way to refer to the root.
373-
resolvedURIs[baseURI.String()] = root
374-
if err := resolve(root, root); err != nil {
375-
return nil, err
376-
}
377-
return resolvedURIs, nil
441+
rs.resolvedURIs[baseURI.String()] = rs.root
442+
443+
return resolve(rs.root, rs.root)
378444
}
379445

380446
// resolveRefs replaces every ref in the schemas with the schema it refers to.
381447
// A reference that doesn't resolve within the schema may refer to some other schema
382448
// that needs to be loaded.
383449
func (r *resolver) resolveRefs(rs *Resolved) error {
384450
for s := range rs.root.all() {
451+
info := rs.resolvedInfos[s]
385452
if s.Ref != "" {
386453
refSchema, _, err := r.resolveRef(rs, s, s.Ref)
387454
if err != nil {
388455
return err
389456
}
390457
// Whether or not the anchor referred to by $ref fragment is dynamic,
391458
// the ref still treats it lexically.
392-
s.resolvedRef = refSchema
459+
info.resolvedRef = refSchema
393460
}
394461
if s.DynamicRef != "" {
395462
refSchema, frag, err := r.resolveRef(rs, s, s.DynamicRef)
@@ -399,11 +466,11 @@ func (r *resolver) resolveRefs(rs *Resolved) error {
399466
if frag != "" {
400467
// The dynamic ref's fragment points to a dynamic anchor.
401468
// We must resolve the fragment at validation time.
402-
s.dynamicRefAnchor = frag
469+
info.dynamicRefAnchor = frag
403470
} else {
404471
// There is no dynamic anchor in the lexically referenced schema,
405472
// so the dynamic ref behaves like a lexical ref.
406-
s.resolvedDynamicRef = refSchema
473+
info.resolvedDynamicRef = refSchema
407474
}
408475
}
409476
}
@@ -417,7 +484,8 @@ func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, d
417484
return nil, "", err
418485
}
419486
// URI-resolve the ref against the current base URI to get a complete URI.
420-
refURI = s.base.uri.ResolveReference(refURI)
487+
base := rs.resolvedInfos[s].base
488+
refURI = rs.resolvedInfos[base].uri.ResolveReference(refURI)
421489
// The non-fragment part of a ref URI refers to the base URI of some schema.
422490
// This part is the same for dynamic refs too: their non-fragment part resolves
423491
// lexically.
@@ -447,6 +515,13 @@ func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, d
447515
}
448516
referencedSchema = lrs.root
449517
assert(referencedSchema != nil, "nil referenced schema")
518+
// Copy the resolvedInfos from lrs into rs, without overwriting
519+
// (hence we can't use maps.Insert).
520+
for s, i := range lrs.resolvedInfos {
521+
if rs.resolvedInfos[s] == nil {
522+
rs.resolvedInfos[s] = i
523+
}
524+
}
450525
}
451526
}
452527

@@ -456,7 +531,9 @@ func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, d
456531
// A JSON Pointer is either the empty string or begins with a '/',
457532
// whereas anchors are always non-empty strings that don't contain slashes.
458533
if frag != "" && !strings.HasPrefix(frag, "/") {
459-
info, found := referencedSchema.anchors[frag]
534+
resInfo := rs.resolvedInfos[referencedSchema]
535+
info, found := resInfo.anchors[frag]
536+
460537
if !found {
461538
return nil, "", fmt.Errorf("no anchor %q in %s", frag, s)
462539
}

0 commit comments

Comments
 (0)