diff --git a/CHANGELOG.md b/CHANGELOG.md index f03a3ccc..93f79491 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Breaking Changes + +- Registry construction now uses an explicit prepare step, and `with_registry` now borrows the prepared registry. `ValidationOptions::with_resource` and `ValidationOptions::with_resources` were removed in favor of building a `Registry` first. See the [Migration Guide](MIGRATION.md) for the details. + +### Performance + +- Avoid registry clones and document clones during validator construction. This improves real-world schema compilation by roughly 10-20% in internal benchmarks. + ## [0.45.1] - 2026-04-06 ### Fixed diff --git a/MIGRATION.md b/MIGRATION.md index c22fab9a..1dc27005 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -1,5 +1,74 @@ # Migration Guide +## Upgrading from 0.45.x to 0.46.0 + +Registry construction is now explicit: add shared schemas first, then call +`prepare()` to build a reusable registry. Validators no longer take ownership of +that registry; pass it by reference with `with_registry(®istry)`. +`ValidationOptions::with_resource` and `ValidationOptions::with_resources` were +removed in favor of building a `Registry` first. For cases with multiple shared +schemas, `extend([...])` is the batch form of `add(...)`. + +```rust +// Old (0.45.x) +use jsonschema::{Registry, Resource}; + +// Inline shared schema +let validator = jsonschema::options() + .with_resource( + "https://example.com/schema", + Resource::from_contents(shared_schema), + ) + .build(&schema)?; + +// Multiple shared schemas +let validator = jsonschema::options() + .with_resources([ + ( + "https://example.com/schema-1", + Resource::from_contents(schema_1), + ), + ( + "https://example.com/schema-2", + Resource::from_contents(schema_2), + ), + ].into_iter()) + .build(&schema)?; + +// Prebuilt registry +let registry = Registry::try_from_resources([ + ( + "https://example.com/schema", + Resource::from_contents(shared_schema), + ), +])?; +let validator = jsonschema::options() + .with_registry(registry) + .build(&schema)?; + +// New (0.46.0) +use jsonschema::Registry; + +// Shared registry + borrowed validator build +let registry = Registry::new() + .add("https://example.com/schema", shared_schema)? + .prepare()?; +let validator = jsonschema::options() + .with_registry(®istry) + .build(&schema)?; + +// Multiple shared schemas +let registry = Registry::new() + .extend([ + ("https://example.com/schema-1", schema_1), + ("https://example.com/schema-2", schema_2), + ])? + .prepare()?; +let validator = jsonschema::options() + .with_registry(®istry) + .build(&schema)?; +``` + ## Upgrading from 0.38.x to 0.39.0 ### Custom keyword API simplified diff --git a/crates/jsonschema-cli/src/main.rs b/crates/jsonschema-cli/src/main.rs index 6628abdd..4959c63f 100644 --- a/crates/jsonschema-cli/src/main.rs +++ b/crates/jsonschema-cli/src/main.rs @@ -444,10 +444,10 @@ fn path_to_uri(path: &std::path::Path) -> String { result } -fn options_for_schema( +fn options_for_schema<'a>( schema_path: &Path, http_options: Option<&jsonschema::HttpOptions>, -) -> Result> { +) -> Result, Box> { let base_uri = path_to_uri(schema_path); let base_uri = referencing::uri::from_str(&base_uri)?; let mut options = jsonschema::options().with_base_uri(base_uri); @@ -695,16 +695,30 @@ fn run_bundle(args: BundleArgs) -> ExitCode { Err(error) => return fail_with_error(error), }; + let mut registry = if let Some(http_opts) = http_options.as_ref() { + let retriever = match jsonschema::HttpRetriever::new(http_opts) { + Ok(retriever) => retriever, + Err(error) => return fail_with_error(error), + }; + jsonschema::Registry::new().retriever(retriever) + } else { + jsonschema::Registry::new() + }; for (uri, path) in &resources { let resource_json = match read_json(path) { Ok(value) => value, Err(error) => return fail_with_error(error), }; - opts = opts.with_resource( - uri.as_str(), - referencing::Resource::from_contents(resource_json), - ); + registry = match registry.add(uri, resource_json) { + Ok(registry) => registry, + Err(error) => return fail_with_error(error), + }; } + let registry = match registry.prepare() { + Ok(registry) => registry, + Err(error) => return fail_with_error(error), + }; + opts = opts.with_registry(®istry); match opts.bundle(&schema_json) { Ok(bundled) => { diff --git a/crates/jsonschema-py/src/lib.rs b/crates/jsonschema-py/src/lib.rs index 9c946282..86e6d3c2 100644 --- a/crates/jsonschema-py/src/lib.rs +++ b/crates/jsonschema-py/src/lib.rs @@ -832,19 +832,19 @@ impl jsonschema::Keyword for CustomKeyword { } } -fn make_options( +fn make_options<'a>( draft: Option, - formats: Option<&Bound<'_, PyDict>>, + formats: Option<&Bound<'a, PyDict>>, validate_formats: Option, ignore_unknown_formats: Option, - retriever: Option<&Bound<'_, PyAny>>, - registry: Option<®istry::Registry>, + retriever: Option<&Bound<'a, PyAny>>, + registry: Option<&'a registry::Registry>, base_uri: Option, - pattern_options: Option<&Bound<'_, PyAny>>, - email_options: Option<&Bound<'_, PyAny>>, - http_options: Option<&Bound<'_, PyAny>>, - keywords: Option<&Bound<'_, PyDict>>, -) -> PyResult { + pattern_options: Option<&Bound<'a, PyAny>>, + email_options: Option<&Bound<'a, PyAny>>, + http_options: Option<&Bound<'a, PyAny>>, + keywords: Option<&Bound<'a, PyDict>>, +) -> PyResult> { let mut options = jsonschema::options(); if let Some(raw_draft_version) = draft { options = options.with_draft(get_draft(raw_draft_version)?); @@ -890,7 +890,7 @@ fn make_options( options = options.with_retriever(Retriever { func }); } if let Some(registry) = registry { - options = options.with_registry(registry.inner.clone()); + options = options.with_registry(®istry.inner); } if let Some(base_uri) = base_uri { options = options.with_base_uri(base_uri); @@ -2021,7 +2021,7 @@ mod meta { let schema = crate::ser::to_value(schema)?; let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(registry.inner.clone()) + .with_registry(®istry.inner) .validate(&schema) } else { jsonschema::meta::validate(&schema) @@ -2070,7 +2070,7 @@ mod meta { let schema = crate::ser::to_value(schema)?; let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(registry.inner.clone()) + .with_registry(®istry.inner) .validate(&schema) } else { jsonschema::meta::validate(&schema) diff --git a/crates/jsonschema-py/src/registry.rs b/crates/jsonschema-py/src/registry.rs index d10fde99..da9458a2 100644 --- a/crates/jsonschema-py/src/registry.rs +++ b/crates/jsonschema-py/src/registry.rs @@ -1,4 +1,3 @@ -use jsonschema::Resource; use pyo3::{exceptions::PyValueError, prelude::*}; use crate::{get_draft, retriever::into_retriever, to_value, Retriever}; @@ -6,7 +5,7 @@ use crate::{get_draft, retriever::into_retriever, to_value, Retriever}; /// A registry of JSON Schema resources, each identified by their canonical URIs. #[pyclass] pub(crate) struct Registry { - pub(crate) inner: jsonschema::Registry, + pub(crate) inner: jsonschema::Registry<'static>, } #[pymethods] @@ -19,30 +18,29 @@ impl Registry { draft: Option, retriever: Option<&Bound<'_, PyAny>>, ) -> PyResult { - let mut options = jsonschema::Registry::options(); + let mut builder = jsonschema::Registry::new(); if let Some(draft) = draft { - options = options.draft(get_draft(draft)?); + builder = builder.draft(get_draft(draft)?); } if let Some(retriever) = retriever { let func = into_retriever(retriever)?; - options = options.retriever(Retriever { func }); + builder = builder.retriever(Retriever { func }); } - let pairs = resources.try_iter()?.map(|item| { + for item in resources.try_iter()? { let pair = item?.unbind(); let (key, value) = pair.extract::<(Bound, Bound)>(py)?; let uri = key.extract::()?; let schema = to_value(&value)?; - let resource = Resource::from_contents(schema); - Ok((uri, resource)) - }); - - let pairs: Result, PyErr> = pairs.collect(); + builder = builder + .add(uri, schema) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + } - let registry = options - .build(pairs?) + let registry = builder + .prepare() .map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(Registry { inner: registry }) diff --git a/crates/jsonschema-py/tests-py/test_bundle.py b/crates/jsonschema-py/tests-py/test_bundle.py index 86579ee2..675e841e 100644 --- a/crates/jsonschema-py/tests-py/test_bundle.py +++ b/crates/jsonschema-py/tests-py/test_bundle.py @@ -43,6 +43,53 @@ def test_bundle_validates_identically(): assert not validator.is_valid({"age": 30}) +def test_bundle_with_registry_and_explicit_draft4_legacy_id_root(): + root = { + "id": "urn:root", + "type": "object", + "properties": {"value": {"$ref": "urn:string"}}, + "required": ["value"], + } + registry = jsonschema_rs.Registry( + resources=[("urn:string", {"type": "string"})], + draft=jsonschema_rs.Draft4, + ) + + bundled = jsonschema_rs.bundle(root, registry=registry, draft=jsonschema_rs.Draft4) + + assert bundled["properties"]["value"]["$ref"] == "urn:string" + assert "urn:string" in bundled["definitions"] + + +def test_bundle_uses_call_retriever_when_inline_root_adds_external_ref(): + def retrieve(uri: str): + if uri == "urn:external": + return {"type": "string"} + raise KeyError(f"Schema not found: {uri}") + + root = { + "type": "object", + "properties": {"value": {"$ref": "urn:external"}}, + "required": ["value"], + } + registry = jsonschema_rs.Registry(resources=[("urn:seed", {"type": "integer"})]) + + bundled = jsonschema_rs.bundle(root, registry=registry, retriever=retrieve) + + assert bundled["properties"]["value"]["$ref"] == "urn:external" + assert "urn:external" in bundled["$defs"] + + +def test_bundle_with_registry_accepts_equivalent_base_uri_with_empty_fragment(): + root = {"$id": "urn:root", "$ref": "urn:shared"} + registry = jsonschema_rs.Registry(resources=[("urn:shared", {"type": "integer"})]) + + bundled = jsonschema_rs.bundle(root, registry=registry, base_uri="urn:root#") + + assert bundled["$ref"] == "urn:shared" + assert bundled["$defs"]["urn:shared"]["type"] == "integer" + + def test_bundle_unresolvable_raises(): with pytest.raises(jsonschema_rs.ReferencingError): jsonschema_rs.bundle({"$ref": "https://example.com/missing.json"}) diff --git a/crates/jsonschema-py/tests-py/test_registry.py b/crates/jsonschema-py/tests-py/test_registry.py index d7984046..d240812d 100644 --- a/crates/jsonschema-py/tests-py/test_registry.py +++ b/crates/jsonschema-py/tests-py/test_registry.py @@ -85,7 +85,6 @@ def test_top_level_functions_with_registry(function): assert list(function(schema, VALID_PERSON, registry=registry)) == [] assert list(function(schema, INVALID_PERSON, registry=registry)) != [] - def test_validator_for_with_registry(): registry = Registry(NESTED_RESOURCES) schema = {"$ref": "https://example.com/person.json"} @@ -96,6 +95,21 @@ def test_validator_for_with_registry(): assert not validator.is_valid(INVALID_PERSON) +def test_validator_for_with_registry_and_explicit_draft4_legacy_id_root(): + registry = Registry([("urn:string", {"type": "string"})], draft=Draft4) + schema = { + "id": "urn:root", + "type": "object", + "properties": {"value": {"$ref": "urn:string"}}, + "required": ["value"], + } + + validator = Draft4Validator(schema, registry=registry) + + assert validator.is_valid({"value": "ok"}) + assert not validator.is_valid({"value": 42}) + + def test_registry_with_retriever_and_validation(): def retrieve(uri: str): if uri == "https://example.com/dynamic.json": @@ -118,6 +132,34 @@ def retrieve(uri: str): assert not dynamic_validator.is_valid("test") +def test_validator_for_uses_call_retriever_when_inline_root_adds_external_ref(): + def retrieve(uri: str): + if uri == "urn:external": + return {"type": "string"} + raise KeyError(f"Schema not found: {uri}") + + registry = Registry([("urn:seed", {"type": "integer"})]) + schema = { + "type": "object", + "properties": {"value": {"$ref": "urn:external"}}, + "required": ["value"], + } + + validator = validator_for(schema, registry=registry, retriever=retrieve) + assert validator.is_valid({"value": "ok"}) + assert not validator.is_valid({"value": 42}) + + +def test_validator_for_with_registry_accepts_equivalent_base_uri_with_empty_fragment(): + registry = Registry([("urn:shared", {"type": "integer"})]) + schema = {"$id": "urn:root", "$ref": "urn:shared"} + + validator = validator_for(schema, registry=registry, base_uri="urn:root#") + + assert validator.is_valid(1) + assert not validator.is_valid("x") + + def test_registry_error_propagation(): registry = Registry(NESTED_RESOURCES) diff --git a/crates/jsonschema-rb/spec/bundle_spec.rb b/crates/jsonschema-rb/spec/bundle_spec.rb index 924e1e47..2d2557c3 100644 --- a/crates/jsonschema-rb/spec/bundle_spec.rb +++ b/crates/jsonschema-rb/spec/bundle_spec.rb @@ -43,6 +43,23 @@ expect(validator.valid?({ "age" => 30 })).to be false end + it "bundles inline legacy-id root with registry and explicit draft4" do + root = { + "id" => "urn:root", + "type" => "object", + "properties" => { "value" => { "$ref" => "urn:string" } }, + "required" => ["value"] + } + registry = JSONSchema::Registry.new( + [["urn:string", { "type" => "string" }]], + draft: :draft4 + ) + + bundled = JSONSchema.bundle(root, registry: registry, draft: :draft4) + expect(bundled.dig("properties", "value", "$ref")).to eq("urn:string") + expect(bundled.dig("definitions", "urn:string")).not_to be_nil + end + it "raises when a $ref cannot be resolved" do expect do JSONSchema.bundle({ "$ref" => "https://example.com/missing.json" }) diff --git a/crates/jsonschema-rb/src/lib.rs b/crates/jsonschema-rb/src/lib.rs index 27186f9e..494fca71 100644 --- a/crates/jsonschema-rb/src/lib.rs +++ b/crates/jsonschema-rb/src/lib.rs @@ -84,16 +84,21 @@ struct BuiltValidator { fn build_validator( ruby: &Ruby, options: ValidationOptions, + registry: Option<&jsonschema::Registry<'_>>, retriever: Option, callback_roots: CallbackRoots, compilation_roots: Arc, schema: &serde_json::Value, ) -> Result { - let validator = match retriever { - Some(ret) => options.with_retriever(ret).build(schema), - None => options.build(schema), + let mut options = match retriever { + Some(ret) => options.with_retriever(ret), + None => options, + }; + if let Some(registry) = registry { + options = options.with_registry(registry); } - .map_err(|error| { + + let validator = options.build(schema).map_err(|error| { if let jsonschema::error::ValidationErrorKind::Referencing(err) = error.kind() { if let Some(message) = retriever_error_message(err) { Error::new(ruby.exception_arg_error(), message) @@ -157,7 +162,7 @@ fn build_parsed_options( ruby: &Ruby, kw: ExtractedKwargs, draft_override: Option, -) -> Result { +) -> Result, Error> { let ( draft_val, validate_formats, @@ -832,6 +837,7 @@ fn validator_for(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -853,7 +859,11 @@ fn bundle(ruby: &Ruby, args: &[Value]) -> Result { let json_schema = to_schema_value(ruby, schema)?; let parsed = build_parsed_options(ruby, kw, None)?; - match parsed.options.bundle(&json_schema) { + let mut options = parsed.options; + if let Some(registry) = parsed.registry { + options = options.with_registry(registry); + } + match options.bundle(&json_schema) { Ok(bundled) => ser::value_to_ruby(ruby, &bundled), Err(e @ jsonschema::ReferencingError::Unretrievable { .. }) => { Err(referencing_error(ruby, e.to_string())) @@ -888,6 +898,7 @@ fn is_valid(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -927,6 +938,7 @@ fn validate(ruby: &Ruby, args: &[Value]) -> Result<(), Error> { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -978,6 +990,7 @@ fn each_error(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -1073,6 +1086,7 @@ fn evaluate(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -1126,6 +1140,7 @@ macro_rules! define_draft_validator { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -1201,9 +1216,9 @@ fn meta_is_valid(ruby: &Ruby, args: &[Value]) -> Result { let json_schema = to_schema_value(ruby, schema)?; - let result = if let Some(reg) = registry { + let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(reg.inner.clone()) + .with_registry(®istry.inner) .validate(&json_schema) } else { jsonschema::meta::validate(&json_schema) @@ -1230,9 +1245,9 @@ fn meta_validate(ruby: &Ruby, args: &[Value]) -> Result<(), Error> { let json_schema = to_schema_value(ruby, schema)?; - let result = if let Some(reg) = registry { + let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(reg.inner.clone()) + .with_registry(®istry.inner) .validate(&json_schema) } else { jsonschema::meta::validate(&json_schema) diff --git a/crates/jsonschema-rb/src/options.rs b/crates/jsonschema-rb/src/options.rs index 8bde03dc..e8d722f2 100644 --- a/crates/jsonschema-rb/src/options.rs +++ b/crates/jsonschema-rb/src/options.rs @@ -56,9 +56,10 @@ define_rb_intern!(static SYM_CALL: "call"); define_rb_intern!(static SYM_NEW: "new"); define_rb_intern!(static SYM_VALIDATE: "validate"); -pub struct ParsedOptions { +pub struct ParsedOptions<'i> { pub mask: Option, - pub options: jsonschema::ValidationOptions, + pub options: jsonschema::ValidationOptions<'i>, + pub registry: Option<&'i jsonschema::Registry<'static>>, pub retriever: Option, // Runtime callbacks invoked during `validator.*` calls (formats / custom keywords). // Retriever callbacks are used at build time and do not affect GVL behavior at runtime. @@ -420,8 +421,9 @@ pub fn make_options_from_kwargs( pattern_options_val: Option, email_options_val: Option, http_options_val: Option, -) -> Result { +) -> Result, Error> { let mut opts = jsonschema::options(); + let mut registry = None; let mut retriever = None; let retriever_was_provided = retriever_val.is_some(); let mut has_ruby_callbacks = false; @@ -473,7 +475,7 @@ pub fn make_options_from_kwargs( "registry must be a JSONSchema::Registry instance", ) })?; - opts = opts.with_registry(reg.inner.clone()); + registry = Some(®.inner); if !retriever_was_provided && retriever.is_none() { if let Some(registry_retriever_value) = reg.retriever_value(ruby) { @@ -763,6 +765,7 @@ pub fn make_options_from_kwargs( Ok(ParsedOptions { mask, options: opts, + registry, retriever, has_ruby_callbacks, callback_roots, diff --git a/crates/jsonschema-rb/src/registry.rs b/crates/jsonschema-rb/src/registry.rs index a6d9159d..2f4288ab 100644 --- a/crates/jsonschema-rb/src/registry.rs +++ b/crates/jsonschema-rb/src/registry.rs @@ -40,7 +40,7 @@ impl Drop for RetrieverBuildRootGuard { #[derive(magnus::TypedData)] #[magnus(class = "JSONSchema::Registry", free_immediately, size, mark)] pub struct Registry { - pub inner: jsonschema::Registry, + pub inner: jsonschema::Registry<'static>, retriever_root: Option>, } @@ -72,7 +72,7 @@ impl Registry { let draft_val = kw.optional.0.flatten(); let retriever_val = kw.optional.1; - let mut builder = jsonschema::Registry::options(); + let mut builder = jsonschema::Registry::new(); let mut retriever_root = None; let mut retriever_build_root = None; @@ -89,29 +89,26 @@ impl Registry { } } - let pairs: Vec<(String, jsonschema::Resource)> = resources - .into_iter() - .map(|item| { - let pair: RArray = TryConvert::try_convert(item)?; - if pair.len() != 2 { - return Err(Error::new( - ruby.exception_arg_error(), - "Each resource must be a [uri, schema] pair", - )); - } - let uri: String = pair.entry(0)?; - let schema_val: Value = pair.entry(1)?; - let schema = to_value(ruby, schema_val)?; - let resource = jsonschema::Resource::from_contents(schema); - Ok((uri, resource)) - }) - .collect::, Error>>()?; - // Keep the retriever proc GC-rooted for the entire build, because `build` // may call into retriever callbacks while traversing referenced resources. let _retriever_build_guard = RetrieverBuildRootGuard::new(retriever_build_root); + for item in resources { + let pair: RArray = TryConvert::try_convert(item)?; + if pair.len() != 2 { + return Err(Error::new( + ruby.exception_arg_error(), + "Each resource must be a [uri, schema] pair", + )); + } + let uri: String = pair.entry(0)?; + let schema_val: Value = pair.entry(1)?; + let schema = to_value(ruby, schema_val)?; + builder = builder + .add(uri, schema) + .map_err(|e| Error::new(ruby.exception_arg_error(), e.to_string()))?; + } let registry = builder - .build(pairs) + .prepare() .map_err(|e| Error::new(ruby.exception_arg_error(), e.to_string()))?; Ok(Registry { diff --git a/crates/jsonschema-referencing/Cargo.toml b/crates/jsonschema-referencing/Cargo.toml index 3747795c..e1769f5e 100644 --- a/crates/jsonschema-referencing/Cargo.toml +++ b/crates/jsonschema-referencing/Cargo.toml @@ -22,10 +22,12 @@ ahash.workspace = true async-trait = { version = "0.1.86", optional = true } fluent-uri = { version = "0.4.1", features = ["serde"] } futures = { version = "0.3.31", optional = true } +itoa = "1" parking_lot = "0.12.3" percent-encoding = "2.3.1" serde_json.workspace = true hashbrown = "0.16" +micromap = "0.3.0" [dev-dependencies] benchmark = { path = "../benchmark/" } diff --git a/crates/jsonschema-referencing/benches/anchor.rs b/crates/jsonschema-referencing/benches/anchor.rs index fce92538..3b516213 100644 --- a/crates/jsonschema-referencing/benches/anchor.rs +++ b/crates/jsonschema-referencing/benches/anchor.rs @@ -14,9 +14,11 @@ fn bench_anchor_lookup(c: &mut Criterion) { } }); let resource = Draft::Draft4.create_resource(data); - let registry = - Registry::try_new("http://example.com/", resource).expect("Invalid registry input"); - + let registry = Registry::new() + .add("http://example.com/", resource) + .expect("Invalid registry input") + .prepare() + .expect("Invalid registry input"); let mut group = c.benchmark_group("Anchor Lookup"); // Benchmark lookup of existing anchor @@ -24,9 +26,9 @@ fn bench_anchor_lookup(c: &mut Criterion) { BenchmarkId::new("resolve", "small"), ®istry, |b, registry| { - let resolver = registry - .try_resolver("http://example.com/") - .expect("Invalid base URI"); + let resolver = registry.resolver( + referencing::uri::from_str("http://example.com/").expect("Invalid base URI"), + ); b.iter_with_large_drop(|| resolver.lookup(black_box("#foo"))); }, ); diff --git a/crates/jsonschema-referencing/benches/pointer.rs b/crates/jsonschema-referencing/benches/pointer.rs index abce3047..f46aa116 100644 --- a/crates/jsonschema-referencing/benches/pointer.rs +++ b/crates/jsonschema-referencing/benches/pointer.rs @@ -29,9 +29,11 @@ fn create_deep_nested_json(depth: usize) -> Value { fn bench_pointers(c: &mut Criterion) { let data = create_deep_nested_json(15); let resource = Draft::Draft202012.create_resource(data); - let registry = Registry::try_new("http://example.com/schema.json", resource) + let registry = Registry::new() + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() .expect("Invalid registry input"); - let cases = [ ("single", "#/properties"), ("double", "#/properties/level_0"), @@ -45,9 +47,10 @@ fn bench_pointers(c: &mut Criterion) { BenchmarkId::new("pointer", name), ®istry, |b, registry| { - let resolver = registry - .try_resolver("http://example.com/schema.json") - .expect("Invalid base URI"); + let resolver = registry.resolver( + referencing::uri::from_str("http://example.com/schema.json") + .expect("Invalid base URI"), + ); b.iter_with_large_drop(|| resolver.lookup(black_box(pointer))); }, ); diff --git a/crates/jsonschema-referencing/benches/registry.rs b/crates/jsonschema-referencing/benches/registry.rs index b8229755..2d114af0 100644 --- a/crates/jsonschema-referencing/benches/registry.rs +++ b/crates/jsonschema-referencing/benches/registry.rs @@ -25,14 +25,34 @@ fn bench_subresources(c: &mut Criterion) { group.bench_with_input(BenchmarkId::new("try_new", name), &schema, |b, schema| { b.iter_batched( - || draft.create_resource(schema.clone()), + || draft.create_resource_ref(schema), |resource| { - Registry::try_new("http://example.com/schema.json", resource) + Registry::new() + .add("http://example.com/schema.json", resource) .expect("Invalid registry input") + .prepare() }, BatchSize::SmallInput, ); }); + + // Owned cases force the registry to store persistent pointers for discovered entries. + group.bench_with_input( + BenchmarkId::new("prepare_owned", name), + &schema, + |b, schema| { + b.iter_batched( + || draft.create_resource(schema.clone()), + |resource| { + Registry::new() + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() + }, + BatchSize::SmallInput, + ); + }, + ); } let drafts = [ (Draft::Draft4, benchmark::GEOJSON, "GeoJSON"), @@ -50,14 +70,29 @@ fn bench_subresources(c: &mut Criterion) { &schema, |b, schema| { b.iter_batched( - || { - ( - draft.create_resource(schema.clone()), - SPECIFICATIONS.clone(), - ) + || (draft.create_resource_ref(schema), &*SPECIFICATIONS), + |(resource, registry)| { + registry + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() }, + BatchSize::SmallInput, + ); + }, + ); + + group.bench_with_input( + BenchmarkId::new("prepare_owned_with_specifications", name), + &schema, + |b, schema| { + b.iter_batched( + || (draft.create_resource(schema.clone()), &*SPECIFICATIONS), |(resource, registry)| { - registry.try_with_resource("http://example.com/schema.json", resource) + registry + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() }, BatchSize::SmallInput, ); diff --git a/crates/jsonschema-referencing/src/anchors/mod.rs b/crates/jsonschema-referencing/src/anchor.rs similarity index 65% rename from crates/jsonschema-referencing/src/anchors/mod.rs rename to crates/jsonschema-referencing/src/anchor.rs index ae595306..5883f7a2 100644 --- a/crates/jsonschema-referencing/src/anchors/mod.rs +++ b/crates/jsonschema-referencing/src/anchor.rs @@ -1,86 +1,42 @@ -use std::{ - hash::Hash, - sync::atomic::{AtomicPtr, Ordering}, -}; +//! Anchors identify sub-schemas within a document by name. +//! +//! JSON Schema defines two anchor flavors: +//! - [`Anchor::Default`]: a plain anchor (`$anchor`), resolved against the current base URI. +//! - [`Anchor::Dynamic`]: a dynamic anchor (`$dynamicAnchor`), which re-anchors to the +//! outermost matching dynamic anchor found in the dynamic scope during resolution. +//! +//! [`AnchorIter`] avoids a heap allocation for the common case of 0–2 anchors per schema object. use serde_json::Value; -mod keys; - -use crate::{resource::InnerResourcePtr, Draft, Error, Resolved, Resolver}; -pub(crate) use keys::{AnchorKey, AnchorKeyRef}; - -#[derive(Debug)] -pub(crate) struct AnchorName { - ptr: AtomicPtr, - len: usize, -} - -impl AnchorName { - fn new(s: &str) -> Self { - Self { - ptr: AtomicPtr::new(s.as_ptr().cast_mut()), - len: s.len(), - } - } - - #[allow(unsafe_code)] - fn as_str(&self) -> &str { - // SAFETY: The pointer is valid as long as the registry exists - unsafe { - std::str::from_utf8_unchecked(std::slice::from_raw_parts( - self.ptr.load(Ordering::Relaxed), - self.len, - )) - } - } -} - -impl Clone for AnchorName { - fn clone(&self) -> Self { - Self { - ptr: AtomicPtr::new(self.ptr.load(Ordering::Relaxed)), - len: self.len, - } - } -} - -impl Hash for AnchorName { - fn hash(&self, state: &mut H) { - self.as_str().hash(state); - } -} - -impl PartialEq for AnchorName { - fn eq(&self, other: &Self) -> bool { - self.as_str() == other.as_str() - } -} - -impl Eq for AnchorName {} +use crate::{Draft, Error, Resolved, Resolver, ResourceRef}; /// An anchor within a resource. -#[derive(Debug, Clone)] -pub(crate) enum Anchor { +#[derive(Debug, Clone, Copy)] +pub(crate) enum Anchor<'a> { Default { - name: AnchorName, - resource: InnerResourcePtr, + name: &'a str, + resource: ResourceRef<'a>, }, Dynamic { - name: AnchorName, - resource: InnerResourcePtr, + name: &'a str, + resource: ResourceRef<'a>, }, } -impl Anchor { +impl<'a> Anchor<'a> { /// Anchor's name. - pub(crate) fn name(&self) -> AnchorName { + #[inline] + pub(crate) fn name(&self) -> &'a str { match self { - Anchor::Default { name, .. } | Anchor::Dynamic { name, .. } => name.clone(), + Anchor::Default { name, .. } | Anchor::Dynamic { name, .. } => name, } } +} + +impl<'r> Anchor<'r> { /// Get the resource for this anchor. - pub(crate) fn resolve<'r>(&'r self, resolver: Resolver<'r>) -> Result, Error> { + pub(crate) fn resolve(&self, resolver: Resolver<'r>) -> Result, Error> { match self { Anchor::Default { resource, .. } => Ok(Resolved::new( resource.contents(), @@ -88,9 +44,9 @@ impl Anchor { resource.draft(), )), Anchor::Dynamic { name, resource } => { - let mut last = resource; + let mut last = *resource; for uri in &resolver.dynamic_scope() { - match resolver.registry.anchor(uri, name.as_str()) { + match resolver.lookup_anchor(uri, name) { Ok(anchor) => { if let Anchor::Dynamic { resource, .. } = anchor { last = resource; @@ -102,7 +58,7 @@ impl Anchor { } Ok(Resolved::new( last.contents(), - resolver.in_subresource_inner(last)?, + resolver.in_subresource(last)?, last.draft(), )) } @@ -110,14 +66,15 @@ impl Anchor { } } -pub(crate) enum AnchorIter { +/// An iterator over 0, 1, or 2 anchors — avoids a [`Vec`] allocation for the common case. +pub(crate) enum AnchorIter<'a> { Empty, - One(Anchor), - Two(Anchor, Anchor), + One(Anchor<'a>), + Two(Anchor<'a>, Anchor<'a>), } -impl Iterator for AnchorIter { - type Item = Anchor; +impl<'a> Iterator for AnchorIter<'a> { + type Item = Anchor<'a>; fn next(&mut self) -> Option { match std::mem::replace(self, AnchorIter::Empty) { @@ -131,7 +88,7 @@ impl Iterator for AnchorIter { } } -pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter<'_> { let Some(schema) = contents.as_object() else { return AnchorIter::Empty; }; @@ -142,16 +99,16 @@ pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter { .get("$anchor") .and_then(Value::as_str) .map(|name| Anchor::Default { - name: AnchorName::new(name), - resource: InnerResourcePtr::new(contents, draft), + name, + resource: ResourceRef::new(contents, draft), }); let dynamic_anchor = schema .get("$dynamicAnchor") .and_then(Value::as_str) .map(|name| Anchor::Dynamic { - name: AnchorName::new(name), - resource: InnerResourcePtr::new(contents, draft), + name, + resource: ResourceRef::new(contents, draft), }); match (default_anchor, dynamic_anchor) { @@ -162,21 +119,21 @@ pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter { } } -pub(crate) fn anchor_2019(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn anchor_2019(draft: Draft, contents: &Value) -> AnchorIter<'_> { match contents .as_object() .and_then(|schema| schema.get("$anchor")) .and_then(Value::as_str) { Some(name) => AnchorIter::One(Anchor::Default { - name: AnchorName::new(name), - resource: InnerResourcePtr::new(contents, draft), + name, + resource: ResourceRef::new(contents, draft), }), None => AnchorIter::Empty, } } -pub(crate) fn legacy_anchor_in_dollar_id(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn legacy_anchor_in_dollar_id(draft: Draft, contents: &Value) -> AnchorIter<'_> { match contents .as_object() .and_then(|schema| schema.get("$id")) @@ -184,14 +141,14 @@ pub(crate) fn legacy_anchor_in_dollar_id(draft: Draft, contents: &Value) -> Anch .and_then(|id| id.strip_prefix('#')) { Some(id) => AnchorIter::One(Anchor::Default { - name: AnchorName::new(id), - resource: InnerResourcePtr::new(contents, draft), + name: id, + resource: ResourceRef::new(contents, draft), }), None => AnchorIter::Empty, } } -pub(crate) fn legacy_anchor_in_id(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn legacy_anchor_in_id(draft: Draft, contents: &Value) -> AnchorIter<'_> { match contents .as_object() .and_then(|schema| schema.get("id")) @@ -199,8 +156,8 @@ pub(crate) fn legacy_anchor_in_id(draft: Draft, contents: &Value) -> AnchorIter .and_then(|id| id.strip_prefix('#')) { Some(id) => AnchorIter::One(Anchor::Default { - name: AnchorName::new(id), - resource: InnerResourcePtr::new(contents, draft), + name: id, + resource: ResourceRef::new(contents, draft), }), None => AnchorIter::Empty, } @@ -214,11 +171,13 @@ mod tests { #[test] fn test_lookup_trivial_dynamic_ref() { let one = Draft::Draft202012.create_resource(json!({"$dynamicAnchor": "foo"})); - let registry = - Registry::try_new("http://example.com", one.clone()).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", &one) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let resolved = resolver.lookup("#foo").expect("Lookup failed"); assert_eq!(resolved.contents(), one.contents()); } @@ -243,15 +202,17 @@ mod tests { }, })); - let registry = Registry::try_from_resources([ - ("http://example.com".to_string(), root.clone()), - ("http://example.com/foo/".to_string(), true_resource), - ("http://example.com/foo/bar".to_string(), root.clone()), - ]) - .expect("Invalid resources"); + let registry = Registry::new() + .extend([ + ("http://example.com", &root), + ("http://example.com/foo/", &true_resource), + ("http://example.com/foo/bar", &root), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); @@ -284,15 +245,17 @@ mod tests { }, })); - let registry = Registry::try_from_resources([ - ("http://example.com".to_string(), two.clone()), - ("http://example.com/foo/".to_string(), one), - ("http://example.com/foo/bar".to_string(), two.clone()), - ]) - .expect("Invalid resources"); + let registry = Registry::new() + .extend([ + ("http://example.com", &two), + ("http://example.com/foo/", &one), + ("http://example.com/foo/bar", &two), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); @@ -311,14 +274,17 @@ mod tests { "foo": { "$anchor": "knownAnchor" } } })); - let registry = Registry::try_new("http://example.com", schema).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#unknownAnchor"); assert_eq!( - result.unwrap_err().to_string(), + result.expect_err("Should fail").to_string(), "Anchor 'unknownAnchor' does not exist" ); } @@ -330,42 +296,49 @@ mod tests { "foo": { "$anchor": "knownAnchor" } } })); - let registry = Registry::try_new("http://example.com", schema).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#invalid/anchor"); assert_eq!( - result.unwrap_err().to_string(), + result.expect_err("Should fail").to_string(), "Anchor 'invalid/anchor' is invalid" ); } #[test] fn test_lookup_trivial_recursive_ref() { - let one = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": true})); - let registry = - Registry::try_new("http://example.com", one.clone()).expect("Invalid resources"); + let resource = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": true})); + let registry = Registry::new() + .add("http://example.com", &resource) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let resolved = first .resolver() .lookup_recursive_ref() .expect("Lookup failed"); - assert_eq!(resolved.contents(), one.contents()); + assert_eq!(resolved.contents(), resource.contents()); } #[test] fn test_lookup_recursive_ref_to_bool() { let true_resource = Draft::Draft201909.create_resource(json!(true)); - let registry = Registry::try_new("http://example.com", true_resource.clone()) + let registry = Registry::new() + .add("http://example.com", &true_resource) + .expect("Invalid resources") + .prepare() .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let resolved = resolver.lookup_recursive_ref().expect("Lookup failed"); assert_eq!(resolved.contents(), true_resource.contents()); } @@ -391,16 +364,17 @@ mod tests { }, })); - let registry = Registry::try_from_resources(vec![ - ("http://example.com".to_string(), root.clone()), - ("http://example.com/foo/".to_string(), true_resource), - ("http://example.com/foo/bar".to_string(), root.clone()), - ]) - .expect("Invalid resources"); - + let registry = Registry::new() + .extend([ + ("http://example.com", &root), + ("http://example.com/foo/", &true_resource), + ("http://example.com/foo/bar", &root), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); let third = second.resolver().lookup("bar").expect("Lookup failed"); @@ -433,16 +407,17 @@ mod tests { })); let three = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": false})); - let registry = Registry::try_from_resources(vec![ - ("http://example.com".to_string(), three), - ("http://example.com/foo/".to_string(), two.clone()), - ("http://example.com/foo/bar".to_string(), one), - ]) - .expect("Invalid resources"); - + let registry = Registry::new() + .extend([ + ("http://example.com", &three), + ("http://example.com/foo/", &two), + ("http://example.com/foo/bar", &one), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); let third = second.resolver().lookup("bar").expect("Lookup failed"); diff --git a/crates/jsonschema-referencing/src/anchors/keys.rs b/crates/jsonschema-referencing/src/anchors/keys.rs deleted file mode 100644 index 0588b639..00000000 --- a/crates/jsonschema-referencing/src/anchors/keys.rs +++ /dev/null @@ -1,84 +0,0 @@ -//! This module provides a mechanism for creating and managing composite keys -//! used in anchor lookups. It allows for efficient lookups without the need -//! to construct data structures with owned values. -//! -//! The key components are: -//! - `AnchorKey`: An owned version of the composite key. -//! - `AnchorKeyRef`: A borrowed version of the composite key. -//! - `BorrowDyn`: A trait that allows for dynamic borrowing of key components. -//! -//! This design enables the use of borrowed data in hash map lookups while -//! still storing owned data. -use std::{ - borrow::Borrow, - hash::{Hash, Hasher}, - sync::Arc, -}; - -use fluent_uri::Uri; - -use super::AnchorName; - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub(crate) struct AnchorKey { - uri: Arc>, - name: AnchorName, -} - -impl AnchorKey { - pub(crate) fn new(uri: Arc>, name: AnchorName) -> Self { - Self { uri, name } - } -} - -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub(crate) struct AnchorKeyRef<'a> { - uri: &'a Uri, - name: &'a str, -} - -impl<'a> AnchorKeyRef<'a> { - pub(crate) fn new(uri: &'a Uri, name: &'a str) -> Self { - AnchorKeyRef { uri, name } - } - - pub(crate) fn borrow_dyn(&self) -> &dyn BorrowDyn { - self as &dyn BorrowDyn - } -} - -pub(crate) trait BorrowDyn { - fn borrowed_key(&self) -> AnchorKeyRef<'_>; -} - -impl BorrowDyn for AnchorKey { - fn borrowed_key(&self) -> AnchorKeyRef<'_> { - AnchorKeyRef::new(&self.uri, self.name.as_str()) - } -} - -impl BorrowDyn for AnchorKeyRef<'_> { - fn borrowed_key(&self) -> AnchorKeyRef<'_> { - *self - } -} - -impl<'a> Borrow for AnchorKey { - fn borrow(&self) -> &(dyn BorrowDyn + 'a) { - self - } -} - -impl Eq for dyn BorrowDyn + '_ {} - -impl PartialEq for dyn BorrowDyn + '_ { - fn eq(&self, other: &dyn BorrowDyn) -> bool { - self.borrowed_key().eq(&other.borrowed_key()) - } -} - -impl Hash for dyn BorrowDyn + '_ { - fn hash(&self, state: &mut H) { - self.borrowed_key().hash(state); - } -} diff --git a/crates/jsonschema-referencing/src/cache.rs b/crates/jsonschema-referencing/src/cache.rs index a7b3e1f6..d268d18d 100644 --- a/crates/jsonschema-referencing/src/cache.rs +++ b/crates/jsonschema-referencing/src/cache.rs @@ -140,10 +140,4 @@ impl SharedUriCache { Ok(inserted) } - - pub(crate) fn into_local(self) -> UriCache { - UriCache { - cache: self.cache.into_inner(), - } - } } diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/draft.rs similarity index 73% rename from crates/jsonschema-referencing/src/specification/mod.rs rename to crates/jsonschema-referencing/src/draft.rs index 97e307c0..a364ea20 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/draft.rs @@ -1,16 +1,8 @@ -use serde_json::Value; -use subresources::SubresourceIterator; - -mod draft201909; -mod draft4; -mod draft6; -mod draft7; -mod ids; -mod subresources; +use serde_json::{Map, Value}; use crate::{ - anchors, - resource::InnerResourcePtr, + anchor, + spec::{self, draft201909, draft202012, draft4, draft6, draft7, ChildNode, ObjectAnalysis}, vocabularies::{VocabularySet, DRAFT_2019_09_VOCABULARIES, DRAFT_2020_12_VOCABULARIES}, Anchor, Error, Resolver, Resource, ResourceRef, Segments, }; @@ -38,10 +30,12 @@ pub enum Draft { } impl Draft { + /// Wraps `contents` in a [`Resource`] tagged with this draft version. #[must_use] pub fn create_resource(self, contents: Value) -> Resource { Resource::new(contents, self) } + /// Wraps a reference to `contents` in a [`ResourceRef`] tagged with this draft version. #[must_use] pub fn create_resource_ref(self, contents: &Value) -> ResourceRef<'_> { ResourceRef::new(contents, self) @@ -71,6 +65,9 @@ impl Draft { } /// Detect what specification could be applied to the given contents. /// + /// Inspects the `$schema` field and returns the matching draft. If no `$schema` + /// field is present, returns `self` unchanged — the caller's current draft is preserved. + /// /// Returns `Draft::Unknown` for custom/unknown `$schema` values. /// Validation of custom meta-schemas happens during registry building. #[must_use] @@ -87,11 +84,14 @@ impl Draft { } pub(crate) fn id_of(self, contents: &Value) -> Option<&str> { match self { - Draft::Draft4 => ids::legacy_id(contents), - Draft::Draft6 | Draft::Draft7 => ids::legacy_dollar_id(contents), - Draft::Draft201909 | Draft::Draft202012 | Draft::Unknown => ids::dollar_id(contents), + Draft::Draft4 => spec::ids::legacy_id(contents), + Draft::Draft6 | Draft::Draft7 => spec::ids::legacy_dollar_id(contents), + Draft::Draft201909 | Draft::Draft202012 | Draft::Unknown => { + spec::ids::dollar_id(contents) + } } } + pub fn subresources_of(self, contents: &Value) -> impl Iterator { match contents.as_object() { Some(schema) => { @@ -100,26 +100,63 @@ impl Draft { Draft::Draft6 => draft6::object_iter, Draft::Draft7 => draft7::object_iter, Draft::Draft201909 => draft201909::object_iter, - Draft::Draft202012 | Draft::Unknown => subresources::object_iter, + Draft::Draft202012 | Draft::Unknown => draft202012::object_iter, }; - SubresourceIterator::Object(schema.iter().flat_map(object_iter)) + draft202012::SubresourceIterator::Object(schema.iter().flat_map(object_iter)) + } + None => draft202012::SubresourceIterator::Empty, + } + } + pub(crate) fn analyze_object(self, contents: &Map) -> ObjectAnalysis<'_> { + match self { + Draft::Draft4 => draft4::analyze_object(contents, self), + Draft::Draft6 => draft6::analyze_object(contents, self), + Draft::Draft7 => draft7::analyze_object(contents, self), + Draft::Draft201909 => draft201909::analyze_object(contents, self), + Draft::Draft202012 | Draft::Unknown => draft202012::analyze_object(contents, self), + } + } + pub(crate) fn for_each_child<'a>( + self, + contents: &'a Map, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, + ) -> Result<(), Error> { + match self { + Draft::Draft4 => draft4::for_each_child(contents, self, f), + Draft::Draft6 => draft6::for_each_child(contents, self, f), + Draft::Draft7 => draft7::for_each_child(contents, self, f), + Draft::Draft201909 => draft201909::for_each_child(contents, self, f), + Draft::Draft202012 | Draft::Unknown => draft202012::for_each_child(contents, self, f), + } + } + pub(crate) fn for_each_owned_child<'a>( + self, + contents: &'a Map, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, + ) -> Result<(), Error> { + match self { + Draft::Draft4 => draft4::for_each_owned_child(contents, self, f), + Draft::Draft6 => draft6::for_each_owned_child(contents, self, f), + Draft::Draft7 => draft7::for_each_owned_child(contents, self, f), + Draft::Draft201909 => draft201909::for_each_owned_child(contents, self, f), + Draft::Draft202012 | Draft::Unknown => { + draft202012::for_each_owned_child(contents, self, f) } - None => SubresourceIterator::Empty, } } - pub(crate) fn anchors(self, contents: &Value) -> impl Iterator { + pub(crate) fn anchors(self, contents: &Value) -> impl Iterator> { match self { - Draft::Draft4 => anchors::legacy_anchor_in_id(self, contents), - Draft::Draft6 | Draft::Draft7 => anchors::legacy_anchor_in_dollar_id(self, contents), - Draft::Draft201909 => anchors::anchor_2019(self, contents), - Draft::Draft202012 | Draft::Unknown => anchors::anchor(self, contents), + Draft::Draft4 => anchor::legacy_anchor_in_id(self, contents), + Draft::Draft6 | Draft::Draft7 => anchor::legacy_anchor_in_dollar_id(self, contents), + Draft::Draft201909 => anchor::anchor_2019(self, contents), + Draft::Draft202012 | Draft::Unknown => anchor::anchor(self, contents), } } pub(crate) fn maybe_in_subresource<'r>( self, segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { match self { Draft::Draft4 => draft4::maybe_in_subresource(segments, resolver, subresource), @@ -129,7 +166,7 @@ impl Draft { draft201909::maybe_in_subresource(segments, resolver, subresource) } Draft::Draft202012 | Draft::Unknown => { - subresources::maybe_in_subresource(segments, resolver, subresource) + draft202012::maybe_in_subresource(segments, resolver, subresource) } } } diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index 3e9f2ed8..ee58a7fd 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -1,31 +1,55 @@ //! # referencing //! //! An implementation-agnostic JSON reference resolution library for Rust. -mod anchors; + +mod anchor; mod cache; +mod draft; mod error; mod list; pub mod meta; +mod path; +mod pointer; mod registry; mod resolver; mod resource; mod retriever; mod segments; -mod specification; +mod small_map; +mod spec; pub mod uri; mod vocabularies; -pub(crate) use anchors::Anchor; +pub(crate) use anchor::Anchor; +pub use draft::Draft; pub use error::{Error, UriError}; pub use fluent_uri::{Iri, IriRef, Uri, UriRef}; pub use list::List; -pub use registry::{parse_index, pointer, Registry, RegistryOptions, SPECIFICATIONS}; +#[doc(hidden)] +pub use path::{write_escaped_str, write_index}; +pub use path::{JsonPointerNode, JsonPointerSegment, OwnedJsonPointer}; +pub use pointer::{parse_index, pointer}; +pub use registry::{IntoRegistryResource, Registry, RegistryBuilder, SPECIFICATIONS}; pub use resolver::{Resolved, Resolver}; pub use resource::{unescape_segment, Resource, ResourceRef}; pub use retriever::{DefaultRetriever, Retrieve}; pub(crate) use segments::Segments; -pub use specification::Draft; pub use vocabularies::{Vocabulary, VocabularySet}; #[cfg(feature = "retrieve-async")] pub use retriever::AsyncRetrieve; + +#[cfg(test)] +mod tests { + use crate::{JsonPointerNode, OwnedJsonPointer}; + + #[test] + fn test_json_pointer_types_are_exported_from_crate_root() { + let root = JsonPointerNode::new(); + let child = root.push(1usize); + + let pointer = OwnedJsonPointer::from(&child); + + assert_eq!(pointer.as_str(), "/1"); + } +} diff --git a/crates/jsonschema-referencing/src/path.rs b/crates/jsonschema-referencing/src/path.rs new file mode 100644 index 00000000..6d9c7c81 --- /dev/null +++ b/crates/jsonschema-referencing/src/path.rs @@ -0,0 +1,178 @@ +use std::{borrow::Cow, sync::Arc}; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum JsonPointerSegment<'a> { + Key(Cow<'a, str>), + Index(usize), +} + +impl From for JsonPointerSegment<'_> { + fn from(value: usize) -> Self { + Self::Index(value) + } +} + +impl<'a> From<&'a str> for JsonPointerSegment<'a> { + fn from(value: &'a str) -> Self { + Self::Key(Cow::Borrowed(value)) + } +} + +impl<'a> From<&'a String> for JsonPointerSegment<'a> { + fn from(value: &'a String) -> Self { + Self::Key(Cow::Borrowed(value)) + } +} + +impl<'a> From> for JsonPointerSegment<'a> { + fn from(value: Cow<'a, str>) -> Self { + Self::Key(value) + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct JsonPointerNode<'a, 'b> { + segment: JsonPointerSegment<'a>, + parent: Option<&'b JsonPointerNode<'b, 'a>>, +} + +impl Default for JsonPointerNode<'_, '_> { + fn default() -> Self { + Self::new() + } +} + +impl JsonPointerNode<'_, '_> { + #[must_use] + pub const fn new() -> Self { + Self { + segment: JsonPointerSegment::Index(0), + parent: None, + } + } +} + +impl<'a, 'b> JsonPointerNode<'a, 'b> { + #[must_use] + pub fn push<'next>( + &'next self, + segment: impl Into>, + ) -> JsonPointerNode<'a, 'next> { + JsonPointerNode { + segment: segment.into(), + parent: Some(self), + } + } + #[must_use] + pub const fn segment(&self) -> &JsonPointerSegment<'a> { + &self.segment + } + + #[must_use] + pub const fn parent(&self) -> Option<&'b JsonPointerNode<'b, 'a>> { + self.parent + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OwnedJsonPointer(Arc); + +impl OwnedJsonPointer { + #[must_use] + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl<'a> From<&'a JsonPointerNode<'_, 'a>> for OwnedJsonPointer { + fn from(value: &'a JsonPointerNode<'_, 'a>) -> Self { + let mut segments = Vec::new(); + let mut head = value; + while let Some(parent) = head.parent { + segments.push(&head.segment); + head = parent; + } + + let mut buffer = String::new(); + for segment in segments.iter().rev() { + buffer.push('/'); + match segment { + JsonPointerSegment::Key(key) => write_escaped_str(&mut buffer, key), + JsonPointerSegment::Index(idx) => write_index(&mut buffer, *idx), + } + } + Self(Arc::from(buffer)) + } +} + +/// Escape a key into a JSON Pointer segment: `~` → `~0`, `/` → `~1`. +/// +/// Appends the escaped form of `value` directly to `buffer`. +pub fn write_escaped_str(buffer: &mut String, value: &str) { + match value.find(['~', '/']) { + Some(mut escape_idx) => { + let mut remaining = value; + + // Loop through the string to replace `~` and `/` + loop { + let (before, after) = remaining.split_at(escape_idx); + // Copy everything before the escape char + buffer.push_str(before); + + // Append the appropriate escape sequence + match after.as_bytes()[0] { + b'~' => buffer.push_str("~0"), + b'/' => buffer.push_str("~1"), + _ => unreachable!(), + } + + // Move past the escaped character + remaining = &after[1..]; + + // Find the next `~` or `/` to continue escaping + if let Some(next_escape_idx) = remaining.find(['~', '/']) { + escape_idx = next_escape_idx; + } else { + // Append any remaining part of the string + buffer.push_str(remaining); + break; + } + } + } + None => { + // If no escape characters are found, append the segment as is + buffer.push_str(value); + } + } +} + +#[inline] +pub fn write_index(buffer: &mut String, idx: usize) { + let mut itoa_buffer = itoa::Buffer::new(); + buffer.push_str(itoa_buffer.format(idx)); +} + +#[cfg(test)] +mod tests { + use super::{JsonPointerNode, OwnedJsonPointer}; + + #[test] + fn test_json_pointer_node_single_index_fast_path() { + let root = JsonPointerNode::new(); + let child = root.push(3usize); + + let pointer = OwnedJsonPointer::from(&child); + + assert_eq!(pointer.as_str(), "/3"); + } + + #[test] + fn test_json_pointer_node_escapes_property_names() { + let root = JsonPointerNode::new(); + let child = root.push("foo/bar~baz"); + + let pointer = OwnedJsonPointer::from(&child); + + assert_eq!(pointer.as_str(), "/foo~1bar~0baz"); + } +} diff --git a/crates/jsonschema-referencing/src/pointer.rs b/crates/jsonschema-referencing/src/pointer.rs new file mode 100644 index 00000000..2bcc7682 --- /dev/null +++ b/crates/jsonschema-referencing/src/pointer.rs @@ -0,0 +1,136 @@ +use serde_json::Value; + +use crate::{ + path::{JsonPointerNode, JsonPointerSegment}, + resource::unescape_segment, +}; + +#[derive(Debug, Clone, Default)] +pub(crate) struct ParsedPointer { + pub(crate) segments: Vec, +} + +impl ParsedPointer { + pub(crate) fn from_json_pointer(pointer: &str) -> Option { + if pointer.is_empty() { + return Some(Self::default()); + } + if !pointer.starts_with('/') { + return None; + } + + let mut segments = Vec::new(); + for token in pointer.split('/').skip(1).map(unescape_segment) { + if let Some(index) = parse_index(&token) { + segments.push(ParsedPointerSegment::Index(index)); + } else { + segments.push(ParsedPointerSegment::Key( + token.into_owned().into_boxed_str(), + )); + } + } + Some(Self { segments }) + } + + pub(crate) fn from_pointer_node(path: &JsonPointerNode<'_, '_>) -> Self { + let mut segments = Vec::new(); + let mut head = path; + + while let Some(parent) = head.parent() { + segments.push(match head.segment() { + JsonPointerSegment::Key(key) => ParsedPointerSegment::Key(key.as_ref().into()), + JsonPointerSegment::Index(idx) => ParsedPointerSegment::Index(*idx), + }); + head = parent; + } + + segments.reverse(); + Self { segments } + } + + pub(crate) fn lookup<'a>(&self, document: &'a Value) -> Option<&'a Value> { + self.segments + .iter() + .try_fold(document, |target, token| match token { + ParsedPointerSegment::Key(key) => match target { + Value::Object(map) => map.get(&**key), + _ => None, + }, + ParsedPointerSegment::Index(index) => match target { + Value::Array(list) => list.get(*index), + _ => None, + }, + }) + } +} + +#[derive(Debug, Clone)] +pub(crate) enum ParsedPointerSegment { + Key(Box), + Index(usize), +} + +/// Look up a value by a JSON Pointer. +/// +/// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`. +pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { + if pointer.is_empty() { + return Some(document); + } + if !pointer.starts_with('/') { + return None; + } + pointer.split('/').skip(1).map(unescape_segment).try_fold( + document, + |target, token| match target { + Value::Object(map) => map.get(&*token), + Value::Array(list) => parse_index(&token).and_then(|x| list.get(x)), + _ => None, + }, + ) +} + +// Taken from `serde_json`. +#[must_use] +pub fn parse_index(s: &str) -> Option { + if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { + return None; + } + s.parse().ok() +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use crate::JsonPointerNode; + + use super::{pointer, ParsedPointer}; + + #[test] + fn test_empty_pointer() { + let document = json!({}); + assert_eq!(pointer(&document, ""), Some(&document)); + } + + #[test] + fn test_parsed_pointer_from_json_pointer_node_matches_pointer_lookup() { + let document = json!({ + "$defs": { + "foo/bar": [ + {"value": true} + ] + } + }); + let root = JsonPointerNode::new(); + let defs = root.push("$defs"); + let entry = defs.push("foo/bar"); + let node = entry.push(0); + + let parsed = ParsedPointer::from_pointer_node(&node); + assert_eq!( + parsed.lookup(&document), + pointer(&document, "/$defs/foo~1bar/0") + ); + } +} diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs deleted file mode 100644 index 3e1e5b8b..00000000 --- a/crates/jsonschema-referencing/src/registry.rs +++ /dev/null @@ -1,2176 +0,0 @@ -use std::{ - collections::{hash_map::Entry, VecDeque}, - num::NonZeroUsize, - pin::Pin, - sync::{Arc, LazyLock}, -}; - -use ahash::{AHashMap, AHashSet}; -use fluent_uri::{pct_enc::EStr, Uri}; -use serde_json::Value; - -use crate::{ - anchors::{AnchorKey, AnchorKeyRef}, - cache::{SharedUriCache, UriCache}, - meta::{self, metas_for_draft}, - resource::{unescape_segment, InnerResourcePtr, JsonSchemaResource}, - uri, - vocabularies::{self, VocabularySet}, - Anchor, DefaultRetriever, Draft, Error, Resolver, Resource, ResourceRef, Retrieve, -}; - -/// An owned-or-refstatic wrapper for JSON `Value`. -#[derive(Debug)] -pub(crate) enum ValueWrapper { - Owned(Value), - StaticRef(&'static Value), -} - -impl AsRef for ValueWrapper { - fn as_ref(&self) -> &Value { - match self { - ValueWrapper::Owned(value) => value, - ValueWrapper::StaticRef(value) => value, - } - } -} - -// SAFETY: `Pin` guarantees stable memory locations for resource pointers, -// while `Arc` enables cheap sharing between multiple registries -type DocumentStore = AHashMap>, Pin>>; -type ResourceMap = AHashMap>, InnerResourcePtr>; - -/// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies -pub static SPECIFICATIONS: LazyLock = - LazyLock::new(|| Registry::build_from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); - -/// A registry of JSON Schema resources, each identified by their canonical URIs. -/// -/// Registries store a collection of in-memory resources and their anchors. -/// They eagerly process all added resources, including their subresources and anchors. -/// This means that subresources contained within any added resources are immediately -/// discoverable and retrievable via their own IDs. -/// -/// # Resource Retrieval -/// -/// Registry supports both blocking and non-blocking retrieval of external resources. -/// -/// ## Blocking Retrieval -/// -/// ```rust -/// use referencing::{Registry, Resource, Retrieve, Uri}; -/// use serde_json::{json, Value}; -/// -/// struct ExampleRetriever; -/// -/// impl Retrieve for ExampleRetriever { -/// fn retrieve( -/// &self, -/// uri: &Uri -/// ) -> Result> { -/// // Always return the same value for brevity -/// Ok(json!({"type": "string"})) -/// } -/// } -/// -/// # fn example() -> Result<(), Box> { -/// let registry = Registry::options() -/// .retriever(ExampleRetriever) -/// .build([ -/// // Initial schema that might reference external schemas -/// ( -/// "https://example.com/user.json", -/// Resource::from_contents(json!({ -/// "type": "object", -/// "properties": { -/// // Should be retrieved by `ExampleRetriever` -/// "role": {"$ref": "https://example.com/role.json"} -/// } -/// })) -/// ) -/// ])?; -/// # Ok(()) -/// # } -/// ``` -/// -/// ## Non-blocking Retrieval -/// -/// ```rust -/// # #[cfg(feature = "retrieve-async")] -/// # mod example { -/// use referencing::{Registry, Resource, AsyncRetrieve, Uri}; -/// use serde_json::{json, Value}; -/// -/// struct ExampleRetriever; -/// -/// #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] -/// #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] -/// impl AsyncRetrieve for ExampleRetriever { -/// async fn retrieve( -/// &self, -/// uri: &Uri -/// ) -> Result> { -/// // Always return the same value for brevity -/// Ok(json!({"type": "string"})) -/// } -/// } -/// -/// # async fn example() -> Result<(), Box> { -/// let registry = Registry::options() -/// .async_retriever(ExampleRetriever) -/// .build([ -/// ( -/// "https://example.com/user.json", -/// Resource::from_contents(json!({ -/// // Should be retrieved by `ExampleRetriever` -/// "$ref": "https://example.com/common/user.json" -/// })) -/// ) -/// ]) -/// .await?; -/// # Ok(()) -/// # } -/// # } -/// ``` -/// -/// The registry will automatically: -/// -/// - Resolve external references -/// - Cache retrieved schemas -/// - Handle nested references -/// - Process JSON Schema anchors -/// -#[derive(Debug)] -pub struct Registry { - documents: DocumentStore, - pub(crate) resources: ResourceMap, - anchors: AHashMap, - resolution_cache: SharedUriCache, -} - -impl Clone for Registry { - fn clone(&self) -> Self { - Self { - documents: self.documents.clone(), - resources: self.resources.clone(), - anchors: self.anchors.clone(), - resolution_cache: self.resolution_cache.clone(), - } - } -} - -/// Configuration options for creating a [`Registry`]. -pub struct RegistryOptions { - retriever: R, - draft: Draft, -} - -impl RegistryOptions { - /// Set specification version under which the resources should be interpreted under. - #[must_use] - pub fn draft(mut self, draft: Draft) -> Self { - self.draft = draft; - self - } -} - -impl RegistryOptions> { - /// Create a new [`RegistryOptions`] with default settings. - #[must_use] - pub fn new() -> Self { - Self { - retriever: Arc::new(DefaultRetriever), - draft: Draft::default(), - } - } - /// Set a custom retriever for the [`Registry`]. - #[must_use] - pub fn retriever(mut self, retriever: impl IntoRetriever) -> Self { - self.retriever = retriever.into_retriever(); - self - } - /// Set a custom async retriever for the [`Registry`]. - #[cfg(feature = "retrieve-async")] - #[must_use] - pub fn async_retriever( - self, - retriever: impl IntoAsyncRetriever, - ) -> RegistryOptions> { - RegistryOptions { - retriever: retriever.into_retriever(), - draft: self.draft, - } - } - /// Create a [`Registry`] from multiple resources using these options. - /// - /// # Errors - /// - /// Returns an error if: - /// - Any URI is invalid - /// - Any referenced resources cannot be retrieved - pub fn build( - self, - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Registry::try_from_resources_impl(pairs, &*self.retriever, self.draft) - } -} - -#[cfg(feature = "retrieve-async")] -impl RegistryOptions> { - /// Create a [`Registry`] from multiple resources using these options with async retrieval. - /// - /// # Errors - /// - /// Returns an error if: - /// - Any URI is invalid - /// - Any referenced resources cannot be retrieved - pub async fn build( - self, - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Registry::try_from_resources_async_impl(pairs, &*self.retriever, self.draft).await - } -} - -pub trait IntoRetriever { - fn into_retriever(self) -> Arc; -} - -impl IntoRetriever for T { - fn into_retriever(self) -> Arc { - Arc::new(self) - } -} - -impl IntoRetriever for Arc { - fn into_retriever(self) -> Arc { - self - } -} - -#[cfg(feature = "retrieve-async")] -pub trait IntoAsyncRetriever { - fn into_retriever(self) -> Arc; -} - -#[cfg(feature = "retrieve-async")] -impl IntoAsyncRetriever for T { - fn into_retriever(self) -> Arc { - Arc::new(self) - } -} - -#[cfg(feature = "retrieve-async")] -impl IntoAsyncRetriever for Arc { - fn into_retriever(self) -> Arc { - self - } -} - -impl Default for RegistryOptions> { - fn default() -> Self { - Self::new() - } -} - -impl Registry { - /// Get [`RegistryOptions`] for configuring a new [`Registry`]. - #[must_use] - pub fn options() -> RegistryOptions> { - RegistryOptions::new() - } - /// Create a new [`Registry`] with a single resource. - /// - /// # Arguments - /// - /// * `uri` - The URI of the resource. - /// * `resource` - The resource to add. - /// - /// # Errors - /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_new(uri: impl AsRef, resource: Resource) -> Result { - Self::try_new_impl(uri, resource, &DefaultRetriever, Draft::default()) - } - /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs. - /// - /// # Arguments - /// - /// * `pairs` - An iterator of (URI, Resource) pairs. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_from_resources( - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Self::try_from_resources_impl(pairs, &DefaultRetriever, Draft::default()) - } - fn try_new_impl( - uri: impl AsRef, - resource: Resource, - retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - Self::try_from_resources_impl([(uri, resource)], retriever, draft) - } - fn try_from_resources_impl( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - let mut documents = AHashMap::new(); - let mut resources = ResourceMap::new(); - let mut anchors = AHashMap::new(); - let mut resolution_cache = UriCache::new(); - let custom_metaschemas = process_resources( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - )?; - - // Validate that all custom $schema references are registered - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) - } - /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs using an async retriever. - /// - /// # Arguments - /// - /// * `pairs` - An iterator of (URI, Resource) pairs. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - #[cfg(feature = "retrieve-async")] - async fn try_from_resources_async_impl( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Draft, - ) -> Result { - let mut documents = AHashMap::new(); - let mut resources = ResourceMap::new(); - let mut anchors = AHashMap::new(); - let mut resolution_cache = UriCache::new(); - - let custom_metaschemas = process_resources_async( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - ) - .await?; - - // Validate that all custom $schema references are registered - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) - } - /// Create a new registry with a new resource. - /// - /// # Errors - /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_with_resource( - self, - uri: impl AsRef, - resource: Resource, - ) -> Result { - let draft = resource.draft(); - self.try_with_resources([(uri, resource)], draft) - } - /// Create a new registry with new resources. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_with_resources( - self, - pairs: impl IntoIterator, Resource)>, - draft: Draft, - ) -> Result { - self.try_with_resources_and_retriever(pairs, &DefaultRetriever, draft) - } - /// Create a new registry with new resources and using the given retriever. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_with_resources_and_retriever( - self, - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - let mut documents = self.documents; - let mut resources = self.resources; - let mut anchors = self.anchors; - let mut resolution_cache = self.resolution_cache.into_local(); - let custom_metaschemas = process_resources( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - )?; - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) - } - /// Create a new registry with new resources and using the given non-blocking retriever. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - #[cfg(feature = "retrieve-async")] - pub async fn try_with_resources_and_retriever_async( - self, - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Draft, - ) -> Result { - let mut documents = self.documents; - let mut resources = self.resources; - let mut anchors = self.anchors; - let mut resolution_cache = self.resolution_cache.into_local(); - let custom_metaschemas = process_resources_async( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - ) - .await?; - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) - } - /// Create a new [`Resolver`] for this registry with the given base URI. - /// - /// # Errors - /// - /// Returns an error if the base URI is invalid. - pub fn try_resolver(&self, base_uri: &str) -> Result, Error> { - let base = uri::from_str(base_uri)?; - Ok(self.resolver(base)) - } - /// Create a new [`Resolver`] for this registry with a known valid base URI. - #[must_use] - pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { - Resolver::new(self, Arc::new(base_uri)) - } - pub(crate) fn anchor<'a>(&self, uri: &'a Uri, name: &'a str) -> Result<&Anchor, Error> { - let key = AnchorKeyRef::new(uri, name); - if let Some(value) = self.anchors.get(key.borrow_dyn()) { - return Ok(value); - } - let resource = &self.resources[uri]; - if let Some(id) = resource.id() { - let uri = uri::from_str(id)?; - let key = AnchorKeyRef::new(&uri, name); - if let Some(value) = self.anchors.get(key.borrow_dyn()) { - return Ok(value); - } - } - if name.contains('/') { - Err(Error::invalid_anchor(name.to_string())) - } else { - Err(Error::no_such_anchor(name.to_string())) - } - } - /// Resolves a reference URI against a base URI using registry's cache. - /// - /// # Errors - /// - /// Returns an error if base has not schema or there is a fragment. - pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { - self.resolution_cache.resolve_against(base, uri) - } - /// Returns vocabulary set configured for given draft and contents. - /// - /// For custom meta-schemas (`Draft::Unknown`), looks up the meta-schema in the registry - /// and extracts its `$vocabulary` declaration. If the meta-schema is not registered, - /// returns the default Draft 2020-12 vocabularies. - #[must_use] - pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { - match draft.detect(contents) { - Draft::Unknown => { - // Custom/unknown meta-schema - try to look it up in the registry - if let Some(specification) = contents - .as_object() - .and_then(|obj| obj.get("$schema")) - .and_then(|s| s.as_str()) - { - if let Ok(mut uri) = uri::from_str(specification) { - // Remove fragment for lookup (e.g., "http://example.com/schema#" -> "http://example.com/schema") - // Resources are stored without fragments, so we must strip it to find the meta-schema - uri.set_fragment(None); - if let Some(resource) = self.resources.get(&uri) { - // Found the custom meta-schema - extract vocabularies - if let Ok(Some(vocabularies)) = vocabularies::find(resource.contents()) - { - return vocabularies; - } - } - // Meta-schema not registered - this will be caught during compilation - // For now, return default vocabularies to allow resource creation - } - } - // Default to Draft 2020-12 vocabularies for unknown meta-schemas - Draft::Unknown.default_vocabularies() - } - draft => draft.default_vocabularies(), - } - } - - /// Build a registry with all the given meta-schemas from specs. - pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { - let schemas_count = schemas.len(); - let pairs = schemas - .iter() - .map(|(uri, schema)| (uri, ResourceRef::from_contents(schema))); - - let mut documents = DocumentStore::with_capacity(schemas_count); - let mut resources = ResourceMap::with_capacity(schemas_count); - - // The actual number of anchors and cache-entries varies across - // drafts. We overshoot here to avoid reallocations, using the sum - // over all specifications. - let mut anchors = AHashMap::with_capacity(8); - let mut resolution_cache = UriCache::with_capacity(35); - - process_meta_schemas( - pairs, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - ) - .expect("Failed to process meta schemas"); - - Self { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - } - } -} - -fn process_meta_schemas( - pairs: impl IntoIterator, ResourceRef<'static>)>, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, -) -> Result<(), Error> { - let mut queue = VecDeque::with_capacity(32); - - for (uri, resource) in pairs { - let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; - let key = Arc::new(uri); - let contents: &'static Value = resource.contents(); - let wrapped_value = Arc::pin(ValueWrapper::StaticRef(contents)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), resource.draft()); - documents.insert(Arc::clone(&key), wrapped_value); - resources.insert(Arc::clone(&key), resource.clone()); - queue.push_back((key, resource)); - } - - // Process current queue and collect references to external resources - while let Some((mut base, resource)) = queue.pop_front() { - if let Some(id) = resource.id() { - base = resolution_cache.resolve_against(&base.borrow(), id)?; - resources.insert(base.clone(), resource.clone()); - } - - // Look for anchors - for anchor in resource.anchors() { - anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); - } - - // Process subresources - for contents in resource.draft().subresources_of(resource.contents()) { - let subresource_draft = resource.draft().detect(contents); - let subresource = InnerResourcePtr::new(contents, subresource_draft); - queue.push_back((base.clone(), subresource)); - } - } - Ok(()) -} - -#[derive(Hash, Eq, PartialEq)] -struct ReferenceKey { - base_ptr: NonZeroUsize, - reference: String, -} - -impl ReferenceKey { - fn new(base: &Arc>, reference: &str) -> Self { - Self { - base_ptr: NonZeroUsize::new(Arc::as_ptr(base) as usize) - .expect("Arc pointer should never be null"), - reference: reference.to_owned(), - } - } -} - -type ReferenceTracker = AHashSet; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum ReferenceKind { - Ref, - Schema, -} - -/// An entry in the processing queue. -/// The optional third element is the document root URI, used when the resource -/// was extracted from a fragment of a larger document. Local `$ref`s need to be -/// resolved against the document root, not just the fragment content. -type QueueEntry = (Arc>, InnerResourcePtr, Option>>); - -struct ProcessingState { - queue: VecDeque, - seen: ReferenceTracker, - external: AHashSet<(String, Uri, ReferenceKind)>, - scratch: String, - refers_metaschemas: bool, - custom_metaschemas: Vec>>, - /// Tracks schema pointers we've visited during recursive external resource collection. - /// This prevents infinite recursion when schemas reference each other. - visited_schemas: AHashSet, -} - -impl ProcessingState { - fn new() -> Self { - Self { - queue: VecDeque::with_capacity(32), - seen: ReferenceTracker::new(), - external: AHashSet::new(), - scratch: String::new(), - refers_metaschemas: false, - custom_metaschemas: Vec::new(), - visited_schemas: AHashSet::new(), - } - } -} - -fn process_input_resources( - pairs: impl IntoIterator, Resource)>, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - state: &mut ProcessingState, -) -> Result<(), Error> { - for (uri, resource) in pairs { - let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; - let key = Arc::new(uri); - match documents.entry(Arc::clone(&key)) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - let (draft, contents) = resource.into_inner(); - let wrapped_value = Arc::pin(ValueWrapper::Owned(contents)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), draft); - resources.insert(Arc::clone(&key), resource.clone()); - - // Track resources with custom meta-schemas for later validation - if draft == Draft::Unknown { - state.custom_metaschemas.push(Arc::clone(&key)); - } - - state.queue.push_back((key, resource, None)); - entry.insert(wrapped_value); - } - } - } - Ok(()) -} - -fn process_queue( - state: &mut ProcessingState, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, -) -> Result<(), Error> { - while let Some((mut base, resource, document_root_uri)) = state.queue.pop_front() { - if let Some(id) = resource.id() { - base = resolve_id(&base, id, resolution_cache)?; - resources.insert(base.clone(), resource.clone()); - } - - for anchor in resource.anchors() { - anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); - } - - // Determine the document root for resolving local $refs. - // If document_root_uri is set (e.g., for fragment-extracted resources), - // look up the full document. Otherwise, this resource IS the document root. - let root = document_root_uri - .as_ref() - .and_then(|uri| resources.get(uri)) - .map_or_else(|| resource.contents(), InnerResourcePtr::contents); - - // Skip if already visited during local $ref resolution - let contents_ptr = std::ptr::from_ref::(resource.contents()) as usize; - if state.visited_schemas.insert(contents_ptr) { - collect_external_resources( - &base, - root, - resource.contents(), - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - resource.draft(), - &mut state.visited_schemas, - )?; - } - - // Subresources inherit the document root URI, or use the current base if none set - let subresource_root_uri = document_root_uri.or_else(|| Some(base.clone())); - for contents in resource.draft().subresources_of(resource.contents()) { - let subresource_draft = resource.draft().detect(contents); - let subresource = InnerResourcePtr::new(contents, subresource_draft); - state - .queue - .push_back((base.clone(), subresource, subresource_root_uri.clone())); - } - } - Ok(()) -} - -fn handle_fragment( - uri: &Uri, - resource: &InnerResourcePtr, - key: &Arc>, - default_draft: Draft, - queue: &mut VecDeque, - document_root_uri: Arc>, -) { - if let Some(fragment) = uri.fragment() { - if let Some(resolved) = pointer(resource.contents(), fragment.as_str()) { - let draft = default_draft.detect(resolved); - let contents = std::ptr::addr_of!(*resolved); - let resource = InnerResourcePtr::new(contents, draft); - queue.push_back((Arc::clone(key), resource, Some(document_root_uri))); - } - } -} - -fn handle_metaschemas( - refers_metaschemas: bool, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - draft_version: Draft, -) { - if refers_metaschemas { - let schemas = metas_for_draft(draft_version); - let draft_registry = Registry::build_from_meta_schemas(schemas); - resources.reserve(draft_registry.resources.len()); - for (key, resource) in draft_registry.resources { - resources.insert(key, resource.clone()); - } - anchors.reserve(draft_registry.anchors.len()); - for (key, anchor) in draft_registry.anchors { - anchors.insert(key, anchor); - } - } -} - -fn create_resource( - retrieved: Value, - fragmentless: Uri, - default_draft: Draft, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - custom_metaschemas: &mut Vec>>, -) -> (Arc>, InnerResourcePtr) { - let draft = default_draft.detect(&retrieved); - let wrapped_value = Arc::pin(ValueWrapper::Owned(retrieved)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), draft); - let key = Arc::new(fragmentless); - documents.insert(Arc::clone(&key), wrapped_value); - resources.insert(Arc::clone(&key), resource.clone()); - - // Track resources with custom meta-schemas for later validation - if draft == Draft::Unknown { - custom_metaschemas.push(Arc::clone(&key)); - } - - (key, resource) -} - -fn process_resources( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, - default_draft: Draft, -) -> Result>>, Error> { - let mut state = ProcessingState::new(); - process_input_resources(pairs, documents, resources, &mut state)?; - - loop { - if state.queue.is_empty() && state.external.is_empty() { - break; - } - - process_queue(&mut state, resources, anchors, resolution_cache)?; - - // Retrieve external resources - for (original, uri, kind) in state.external.drain() { - let mut fragmentless = uri.clone(); - fragmentless.set_fragment(None); - if !resources.contains_key(&fragmentless) { - let retrieved = match retriever.retrieve(&fragmentless) { - Ok(retrieved) => retrieved, - Err(error) => { - handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; - continue; - } - }; - - let (key, resource) = create_resource( - retrieved, - fragmentless, - default_draft, - documents, - resources, - &mut state.custom_metaschemas, - ); - handle_fragment( - &uri, - &resource, - &key, - default_draft, - &mut state.queue, - Arc::clone(&key), - ); - state.queue.push_back((key, resource, None)); - } - } - } - - handle_metaschemas(state.refers_metaschemas, resources, anchors, default_draft); - - Ok(state.custom_metaschemas) -} - -#[cfg(feature = "retrieve-async")] -async fn process_resources_async( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, - default_draft: Draft, -) -> Result>>, Error> { - type ExternalRefsByBase = AHashMap, Vec<(String, Uri, ReferenceKind)>>; - - let mut state = ProcessingState::new(); - process_input_resources(pairs, documents, resources, &mut state)?; - - loop { - if state.queue.is_empty() && state.external.is_empty() { - break; - } - - process_queue(&mut state, resources, anchors, resolution_cache)?; - - if !state.external.is_empty() { - // Group external refs by fragmentless URI to avoid fetching the same resource multiple times. - // Multiple refs may point to the same base URL with different fragments (e.g., #/$defs/foo and #/$defs/bar). - // We need to fetch each unique base URL only once, then handle all fragment refs against it. - let mut grouped = ExternalRefsByBase::new(); - for (original, uri, kind) in state.external.drain() { - let mut fragmentless = uri.clone(); - fragmentless.set_fragment(None); - if !resources.contains_key(&fragmentless) { - grouped - .entry(fragmentless) - .or_default() - .push((original, uri, kind)); - } - } - - // Fetch each unique fragmentless URI once - let entries: Vec<_> = grouped.into_iter().collect(); - let results = { - let futures = entries - .iter() - .map(|(fragmentless, _)| retriever.retrieve(fragmentless)); - futures::future::join_all(futures).await - }; - - for ((fragmentless, refs), result) in entries.into_iter().zip(results) { - let retrieved = match result { - Ok(retrieved) => retrieved, - Err(error) => { - // Report error for the first ref that caused this fetch - if let Some((original, uri, kind)) = refs.into_iter().next() { - handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; - } - continue; - } - }; - - let (key, resource) = create_resource( - retrieved, - fragmentless, - default_draft, - documents, - resources, - &mut state.custom_metaschemas, - ); - - // Handle all fragment refs that pointed to this base URL - for (_, uri, _) in &refs { - handle_fragment( - uri, - &resource, - &key, - default_draft, - &mut state.queue, - Arc::clone(&key), - ); - } - - state.queue.push_back((key, resource, None)); - } - } - } - - handle_metaschemas(state.refers_metaschemas, resources, anchors, default_draft); - - Ok(state.custom_metaschemas) -} - -fn handle_retrieve_error( - uri: &Uri, - original: &str, - fragmentless: &Uri, - error: Box, - kind: ReferenceKind, -) -> Result<(), Error> { - match kind { - ReferenceKind::Schema => { - // $schema fetch failures are non-fatal during resource processing - // Unregistered custom meta-schemas will be caught in validate_custom_metaschemas() - Ok(()) - } - ReferenceKind::Ref => { - // $ref fetch failures are fatal - they're required for validation - if uri.scheme().as_str() == "json-schema" { - Err(Error::unretrievable( - original, - "No base URI is available".into(), - )) - } else { - Err(Error::unretrievable(fragmentless.as_str(), error)) - } - } - } -} - -fn validate_custom_metaschemas( - custom_metaschemas: &[Arc>], - resources: &ResourceMap, -) -> Result<(), Error> { - // Only validate resources with Draft::Unknown - for uri in custom_metaschemas { - if let Some(resource) = resources.get(uri) { - // Extract the $schema value from this resource - if let Some(schema_uri) = resource - .contents() - .as_object() - .and_then(|obj| obj.get("$schema")) - .and_then(|s| s.as_str()) - { - // Check if this meta-schema is registered - match uri::from_str(schema_uri) { - Ok(mut meta_uri) => { - // Remove fragment for lookup (e.g., "http://example.com/schema#" -> "http://example.com/schema") - meta_uri.set_fragment(None); - if !resources.contains_key(&meta_uri) { - return Err(Error::unknown_specification(schema_uri)); - } - } - Err(_) => { - return Err(Error::unknown_specification(schema_uri)); - } - } - } - } - } - Ok(()) -} - -fn collect_external_resources( - base: &Arc>, - root: &Value, - contents: &Value, - collected: &mut AHashSet<(String, Uri, ReferenceKind)>, - seen: &mut ReferenceTracker, - resolution_cache: &mut UriCache, - scratch: &mut String, - refers_metaschemas: &mut bool, - draft: Draft, - visited: &mut AHashSet, -) -> Result<(), Error> { - // URN schemes are not supported for external resolution - if base.scheme().as_str() == "urn" { - return Ok(()); - } - - macro_rules! on_reference { - ($reference:expr, $key:literal) => { - // Skip well-known schema references - if $reference.starts_with("https://json-schema.org/draft/") - || $reference.starts_with("http://json-schema.org/draft-") - || base.as_str().starts_with("https://json-schema.org/draft/") - { - if $key == "$ref" { - *refers_metaschemas = true; - } - } else if $reference != "#" { - if mark_reference(seen, base, $reference) { - // Handle local references separately as they may have nested references to external resources - if $reference.starts_with('#') { - // Use the root document for pointer resolution since local refs are always - // relative to the document root, not the current subschema. - // Also track $id changes along the path to get the correct base URI. - if let Some((referenced, resolved_base)) = pointer_with_base( - root, - $reference.trim_start_matches('#'), - base, - resolution_cache, - draft, - )? { - // Recursively collect from the referenced schema and all its subresources - collect_external_resources_recursive( - &resolved_base, - root, - referenced, - collected, - seen, - resolution_cache, - scratch, - refers_metaschemas, - draft, - visited, - )?; - } - } else { - let resolved = if base.has_fragment() { - let mut base_without_fragment = base.as_ref().clone(); - base_without_fragment.set_fragment(None); - - let (path, fragment) = match $reference.split_once('#') { - Some((path, fragment)) => (path, Some(fragment)), - None => ($reference, None), - }; - - let mut resolved = (*resolution_cache - .resolve_against(&base_without_fragment.borrow(), path)?) - .clone(); - // Add the fragment back if present - if let Some(fragment) = fragment { - // It is cheaper to check if it is properly encoded than allocate given that - // the majority of inputs do not need to be additionally encoded - if let Some(encoded) = uri::EncodedString::new(fragment) { - resolved = resolved.with_fragment(Some(encoded)); - } else { - uri::encode_to(fragment, scratch); - resolved = resolved.with_fragment(Some( - uri::EncodedString::new_or_panic(scratch), - )); - scratch.clear(); - } - } - resolved - } else { - (*resolution_cache - .resolve_against(&base.borrow(), $reference)?) - .clone() - }; - - let kind = if $key == "$schema" { - ReferenceKind::Schema - } else { - ReferenceKind::Ref - }; - collected.insert(($reference.to_string(), resolved, kind)); - } - } - } - }; - } - - if let Some(object) = contents.as_object() { - if object.len() < 3 { - for (key, value) in object { - if key == "$ref" { - if let Some(reference) = value.as_str() { - on_reference!(reference, "$ref"); - } - } else if key == "$schema" { - if let Some(reference) = value.as_str() { - on_reference!(reference, "$schema"); - } - } - } - } else { - if let Some(reference) = object.get("$ref").and_then(Value::as_str) { - on_reference!(reference, "$ref"); - } - if let Some(reference) = object.get("$schema").and_then(Value::as_str) { - on_reference!(reference, "$schema"); - } - } - } - Ok(()) -} - -/// Recursively collect external resources from a schema and all its subresources. -/// -/// The `visited` set tracks schema pointers we've already processed to avoid infinite -/// recursion when schemas reference each other (directly or through subresources). -fn collect_external_resources_recursive( - base: &Arc>, - root: &Value, - contents: &Value, - collected: &mut AHashSet<(String, Uri, ReferenceKind)>, - seen: &mut ReferenceTracker, - resolution_cache: &mut UriCache, - scratch: &mut String, - refers_metaschemas: &mut bool, - draft: Draft, - visited: &mut AHashSet, -) -> Result<(), Error> { - // Track by pointer address to avoid processing the same schema twice - let ptr = std::ptr::from_ref::(contents) as usize; - if !visited.insert(ptr) { - return Ok(()); - } - - let current_base = match draft.id_of(contents) { - Some(id) => resolve_id(base, id, resolution_cache)?, - None => Arc::clone(base), - }; - - // First, collect from the current schema - collect_external_resources( - ¤t_base, - root, - contents, - collected, - seen, - resolution_cache, - scratch, - refers_metaschemas, - draft, - visited, - )?; - - // Then recursively process all subresources - for subresource in draft.subresources_of(contents) { - let subresource_draft = draft.detect(subresource); - collect_external_resources_recursive( - ¤t_base, - root, - subresource, - collected, - seen, - resolution_cache, - scratch, - refers_metaschemas, - subresource_draft, - visited, - )?; - } - Ok(()) -} - -fn mark_reference(seen: &mut ReferenceTracker, base: &Arc>, reference: &str) -> bool { - seen.insert(ReferenceKey::new(base, reference)) -} - -/// Resolve an `$id` against a base URI, handling anchor-style IDs and empty fragments. -/// -/// Anchor-style `$id` values (starting with `#`) don't change the base URI. -/// Empty fragments are stripped from the resolved URI. -fn resolve_id( - base: &Arc>, - id: &str, - resolution_cache: &mut UriCache, -) -> Result>, Error> { - if id.starts_with('#') { - return Ok(Arc::clone(base)); - } - let mut resolved = (*resolution_cache.resolve_against(&base.borrow(), id)?).clone(); - if resolved.fragment().is_some_and(EStr::is_empty) { - resolved.set_fragment(None); - } - Ok(Arc::new(resolved)) -} - -/// Look up a value by a JSON Pointer. -/// -/// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`. -pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { - if pointer.is_empty() { - return Some(document); - } - if !pointer.starts_with('/') { - return None; - } - pointer.split('/').skip(1).map(unescape_segment).try_fold( - document, - |target, token| match target { - Value::Object(map) => map.get(&*token), - Value::Array(list) => parse_index(&token).and_then(|x| list.get(x)), - _ => None, - }, - ) -} - -/// Look up a value by a JSON Pointer, tracking `$id` changes along the path. -/// -/// Returns both the resolved value and the accumulated base URI after processing -/// any `$id` declarations encountered along the path. Note that anchor-style `$id` -/// values (starting with `#`) don't change the base URI. -#[allow(clippy::type_complexity)] -fn pointer_with_base<'a>( - document: &'a Value, - pointer: &str, - base: &Arc>, - resolution_cache: &mut UriCache, - draft: Draft, -) -> Result>)>, Error> { - if pointer.is_empty() { - return Ok(Some((document, Arc::clone(base)))); - } - if !pointer.starts_with('/') { - return Ok(None); - } - - let mut current = document; - let mut current_base = Arc::clone(base); - let mut current_draft = draft; - - for token in pointer.split('/').skip(1).map(unescape_segment) { - // Check for $id in the current value before traversing deeper - current_draft = current_draft.detect(current); - if let Some(id) = current_draft.id_of(current) { - current_base = resolve_id(¤t_base, id, resolution_cache)?; - } - - current = match current { - Value::Object(map) => match map.get(&*token) { - Some(v) => v, - None => return Ok(None), - }, - Value::Array(list) => match parse_index(&token).and_then(|x| list.get(x)) { - Some(v) => v, - None => return Ok(None), - }, - _ => return Ok(None), - }; - } - - // Note: We don't check $id in the final value here because - // `collect_external_resources_recursive` will handle it - Ok(Some((current, current_base))) -} - -// Taken from `serde_json`. -#[must_use] -pub fn parse_index(s: &str) -> Option { - if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { - return None; - } - s.parse().ok() -} - -#[cfg(test)] -mod tests { - use std::error::Error as _; - - use ahash::AHashMap; - use fluent_uri::Uri; - use serde_json::{json, Value}; - use test_case::test_case; - - use crate::{uri::from_str, Draft, Registry, Resource, Retrieve}; - - use super::{pointer, RegistryOptions, SPECIFICATIONS}; - - #[test] - fn test_empty_pointer() { - let document = json!({}); - assert_eq!(pointer(&document, ""), Some(&document)); - } - - #[test] - fn test_invalid_uri_on_registry_creation() { - let schema = Draft::Draft202012.create_resource(json!({})); - let result = Registry::try_new(":/example.com", schema); - let error = result.expect_err("Should fail"); - - assert_eq!( - error.to_string(), - "Invalid URI reference ':/example.com': unexpected character at index 0" - ); - let source_error = error.source().expect("Should have a source"); - let inner_source = source_error.source().expect("Should have a source"); - assert_eq!(inner_source.to_string(), "unexpected character at index 0"); - } - - #[test] - fn test_lookup_unresolvable_url() { - // Create a registry with a single resource - let schema = Draft::Draft202012.create_resource(json!({ - "type": "object", - "properties": { - "foo": { "type": "string" } - } - })); - let registry = - Registry::try_new("http://example.com/schema1", schema).expect("Invalid resources"); - - // Attempt to create a resolver for a URL not in the registry - let resolver = registry - .try_resolver("http://example.com/non_existent_schema") - .expect("Invalid base URI"); - - let result = resolver.lookup(""); - - assert_eq!( - result.unwrap_err().to_string(), - "Resource 'http://example.com/non_existent_schema' is not present in a registry and retrieving it failed: Retrieving external resources is not supported once the registry is populated" - ); - } - - #[test] - fn test_relative_uri_without_base() { - let schema = Draft::Draft202012.create_resource(json!({"$ref": "./virtualNetwork.json"})); - let error = Registry::try_new("json-schema:///", schema).expect_err("Should fail"); - assert_eq!(error.to_string(), "Resource './virtualNetwork.json' is not present in a registry and retrieving it failed: No base URI is available"); - } - - #[test] - fn test_try_with_resources_requires_registered_custom_meta_schema() { - let base_registry = Registry::try_new( - "http://example.com/root", - Resource::from_contents(json!({"type": "object"})), - ) - .expect("Base registry should be created"); - - let custom_schema = Resource::from_contents(json!({ - "$id": "http://example.com/custom", - "$schema": "http://example.com/meta/custom", - "type": "string" - })); - - let error = base_registry - .try_with_resources( - [("http://example.com/custom", custom_schema)], - Draft::default(), - ) - .expect_err("Extending registry must fail when the custom $schema is not registered"); - - let error_msg = error.to_string(); - assert_eq!( - error_msg, - "Unknown meta-schema: 'http://example.com/meta/custom'. Custom meta-schemas must be registered in the registry before use" - ); - } - - #[test] - fn test_try_with_resources_accepts_registered_custom_meta_schema_fragment() { - let meta_schema = Resource::from_contents(json!({ - "$id": "http://example.com/meta/custom#", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "type": "object" - })); - - let registry = Registry::try_new("http://example.com/meta/custom#", meta_schema) - .expect("Meta-schema should be registered successfully"); - - let schema = Resource::from_contents(json!({ - "$id": "http://example.com/schemas/my-schema", - "$schema": "http://example.com/meta/custom#", - "type": "string" - })); - - registry - .clone() - .try_with_resources( - [("http://example.com/schemas/my-schema", schema)], - Draft::default(), - ) - .expect("Schema should accept registered meta-schema URI with trailing '#'"); - } - - #[test] - fn test_chained_custom_meta_schemas() { - // Meta-schema B (uses standard Draft 2020-12) - let meta_schema_b = json!({ - "$id": "json-schema:///meta/level-b", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$vocabulary": { - "https://json-schema.org/draft/2020-12/vocab/core": true, - "https://json-schema.org/draft/2020-12/vocab/validation": true, - }, - "type": "object", - "properties": { - "customProperty": {"type": "string"} - } - }); - - // Meta-schema A (uses Meta-schema B) - let meta_schema_a = json!({ - "$id": "json-schema:///meta/level-a", - "$schema": "json-schema:///meta/level-b", - "customProperty": "level-a-meta", - "type": "object" - }); - - // Schema (uses Meta-schema A) - let schema = json!({ - "$id": "json-schema:///schemas/my-schema", - "$schema": "json-schema:///meta/level-a", - "customProperty": "my-schema", - "type": "string" - }); - - // Register all meta-schemas and schema in a chained manner - // All resources are provided upfront, so no external retrieval should occur - Registry::try_from_resources([ - ( - "json-schema:///meta/level-b", - Resource::from_contents(meta_schema_b), - ), - ( - "json-schema:///meta/level-a", - Resource::from_contents(meta_schema_a), - ), - ( - "json-schema:///schemas/my-schema", - Resource::from_contents(schema), - ), - ]) - .expect("Chained custom meta-schemas should be accepted when all are registered"); - } - - struct TestRetriever { - schemas: AHashMap, - } - - impl TestRetriever { - fn new(schemas: AHashMap) -> Self { - TestRetriever { schemas } - } - } - - impl Retrieve for TestRetriever { - fn retrieve( - &self, - uri: &Uri, - ) -> Result> { - if let Some(value) = self.schemas.get(uri.as_str()) { - Ok(value.clone()) - } else { - Err(format!("Failed to find {uri}").into()) - } - } - } - - fn create_test_retriever(schemas: &[(&str, Value)]) -> TestRetriever { - TestRetriever::new( - schemas - .iter() - .map(|&(k, ref v)| (k.to_string(), v.clone())) - .collect(), - ) - } - - struct TestCase { - input_resources: Vec<(&'static str, Value)>, - remote_resources: Vec<(&'static str, Value)>, - expected_resolved_uris: Vec<&'static str>, - } - - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), - ], - remote_resources: vec![ - ("http://example.com/schema2", json!({"type": "object"})), - ], - expected_resolved_uris: vec!["http://example.com/schema1", "http://example.com/schema2"], - } - ;"External ref at top")] - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({ - "$defs": { - "subschema": {"type": "string"} - }, - "$ref": "#/$defs/subschema" - })), - ], - remote_resources: vec![], - expected_resolved_uris: vec!["http://example.com/schema1"], - } - ;"Internal ref at top")] - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), - ("http://example.com/schema2", json!({"type": "object"})), - ], - remote_resources: vec![], - expected_resolved_uris: vec!["http://example.com/schema1", "http://example.com/schema2"], - } - ;"Ref to later resource")] - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({ - "type": "object", - "properties": { - "prop1": {"$ref": "http://example.com/schema2"} - } - })), - ], - remote_resources: vec![ - ("http://example.com/schema2", json!({"type": "string"})), - ], - expected_resolved_uris: vec!["http://example.com/schema1", "http://example.com/schema2"], - } - ;"External ref in subresource")] - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({ - "type": "object", - "properties": { - "prop1": {"$ref": "#/$defs/subschema"} - }, - "$defs": { - "subschema": {"type": "string"} - } - })), - ], - remote_resources: vec![], - expected_resolved_uris: vec!["http://example.com/schema1"], - } - ;"Internal ref in subresource")] - #[test_case( - TestCase { - input_resources: vec![ - ("file:///schemas/main.json", json!({"$ref": "file:///schemas/external.json"})), - ], - remote_resources: vec![ - ("file:///schemas/external.json", json!({"type": "object"})), - ], - expected_resolved_uris: vec!["file:///schemas/main.json", "file:///schemas/external.json"], - } - ;"File scheme: external ref at top")] - #[test_case( - TestCase { - input_resources: vec![ - ("file:///schemas/main.json", json!({"$ref": "subfolder/schema.json"})), - ], - remote_resources: vec![ - ("file:///schemas/subfolder/schema.json", json!({"type": "string"})), - ], - expected_resolved_uris: vec!["file:///schemas/main.json", "file:///schemas/subfolder/schema.json"], - } - ;"File scheme: relative path ref")] - #[test_case( - TestCase { - input_resources: vec![ - ("file:///schemas/main.json", json!({ - "type": "object", - "properties": { - "local": {"$ref": "local.json"}, - "remote": {"$ref": "http://example.com/schema"} - } - })), - ], - remote_resources: vec![ - ("file:///schemas/local.json", json!({"type": "string"})), - ("http://example.com/schema", json!({"type": "number"})), - ], - expected_resolved_uris: vec![ - "file:///schemas/main.json", - "file:///schemas/local.json", - "http://example.com/schema" - ], - } - ;"File scheme: mixing with http scheme")] - #[test_case( - TestCase { - input_resources: vec![ - ("file:///C:/schemas/main.json", json!({"$ref": "/D:/other_schemas/schema.json"})), - ], - remote_resources: vec![ - ("file:///D:/other_schemas/schema.json", json!({"type": "boolean"})), - ], - expected_resolved_uris: vec![ - "file:///C:/schemas/main.json", - "file:///D:/other_schemas/schema.json" - ], - } - ;"File scheme: absolute path in Windows style")] - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), - ], - remote_resources: vec![ - ("http://example.com/schema2", json!({"$ref": "http://example.com/schema3"})), - ("http://example.com/schema3", json!({"$ref": "http://example.com/schema4"})), - ("http://example.com/schema4", json!({"$ref": "http://example.com/schema5"})), - ("http://example.com/schema5", json!({"type": "object"})), - ], - expected_resolved_uris: vec![ - "http://example.com/schema1", - "http://example.com/schema2", - "http://example.com/schema3", - "http://example.com/schema4", - "http://example.com/schema5", - ], - } - ;"Four levels of external references")] - #[test_case( - TestCase { - input_resources: vec![ - ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), - ], - remote_resources: vec![ - ("http://example.com/schema2", json!({"$ref": "http://example.com/schema3"})), - ("http://example.com/schema3", json!({"$ref": "http://example.com/schema4"})), - ("http://example.com/schema4", json!({"$ref": "http://example.com/schema5"})), - ("http://example.com/schema5", json!({"$ref": "http://example.com/schema6"})), - ("http://example.com/schema6", json!({"$ref": "http://example.com/schema1"})), - ], - expected_resolved_uris: vec![ - "http://example.com/schema1", - "http://example.com/schema2", - "http://example.com/schema3", - "http://example.com/schema4", - "http://example.com/schema5", - "http://example.com/schema6", - ], - } - ;"Five levels of external references with circular reference")] - fn test_references_processing(test_case: TestCase) { - let retriever = create_test_retriever(&test_case.remote_resources); - - let input_pairs = test_case - .input_resources - .clone() - .into_iter() - .map(|(uri, value)| (uri, Resource::from_contents(value))); - - let registry = Registry::options() - .retriever(retriever) - .build(input_pairs) - .expect("Invalid resources"); - // Verify that all expected URIs are resolved and present in resources - for uri in test_case.expected_resolved_uris { - let resolver = registry.try_resolver("").expect("Invalid base URI"); - assert!(resolver.lookup(uri).is_ok()); - } - } - - #[test] - fn test_default_retriever_with_remote_refs() { - let result = Registry::try_from_resources([( - "http://example.com/schema1", - Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), - )]); - let error = result.expect_err("Should fail"); - assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); - assert!(error.source().is_some()); - } - - #[test] - fn test_options() { - let _registry = RegistryOptions::default() - .build([("", Resource::from_contents(json!({})))]) - .expect("Invalid resources"); - } - - #[test] - fn test_registry_with_duplicate_input_uris() { - let input_resources = vec![ - ( - "http://example.com/schema", - json!({ - "type": "object", - "properties": { - "foo": { "type": "string" } - } - }), - ), - ( - "http://example.com/schema", - json!({ - "type": "object", - "properties": { - "bar": { "type": "number" } - } - }), - ), - ]; - - let result = Registry::try_from_resources( - input_resources - .into_iter() - .map(|(uri, value)| (uri, Draft::Draft202012.create_resource(value))), - ); - - assert!( - result.is_ok(), - "Failed to create registry with duplicate input URIs" - ); - let registry = result.unwrap(); - - let resource = registry - .resources - .get(&from_str("http://example.com/schema").expect("Invalid URI")) - .unwrap(); - let properties = resource - .contents() - .get("properties") - .and_then(|v| v.as_object()) - .unwrap(); - - assert!( - !properties.contains_key("bar"), - "Registry should contain the earliest added schema" - ); - assert!( - properties.contains_key("foo"), - "Registry should contain the overwritten schema" - ); - } - - #[test] - fn test_resolver_debug() { - let registry = SPECIFICATIONS - .clone() - .try_with_resource("http://example.com", Resource::from_contents(json!({}))) - .expect("Invalid resource"); - let resolver = registry - .try_resolver("http://127.0.0.1/schema") - .expect("Invalid base URI"); - assert_eq!( - format!("{resolver:?}"), - "Resolver { base_uri: \"http://127.0.0.1/schema\", scopes: \"[]\" }" - ); - } - - #[test] - fn test_try_with_resource() { - let registry = SPECIFICATIONS - .clone() - .try_with_resource("http://example.com", Resource::from_contents(json!({}))) - .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); - let resolved = resolver - .lookup("http://json-schema.org/draft-06/schema#/definitions/schemaArray") - .expect("Lookup failed"); - assert_eq!( - resolved.contents(), - &json!({ - "type": "array", - "minItems": 1, - "items": { "$ref": "#" } - }) - ); - } - - #[test] - fn test_invalid_reference() { - let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"})); - let _ = Registry::try_new("http://#/", resource); - } -} - -#[cfg(all(test, feature = "retrieve-async"))] -mod async_tests { - use crate::{uri, DefaultRetriever, Draft, Registry, Resource, Uri}; - use ahash::AHashMap; - use serde_json::{json, Value}; - use std::{ - error::Error, - sync::atomic::{AtomicUsize, Ordering}, - }; - - struct TestAsyncRetriever { - schemas: AHashMap, - } - - impl TestAsyncRetriever { - fn with_schema(uri: impl Into, schema: Value) -> Self { - TestAsyncRetriever { - schemas: { AHashMap::from_iter([(uri.into(), schema)]) }, - } - } - } - - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] - impl crate::AsyncRetrieve for TestAsyncRetriever { - async fn retrieve( - &self, - uri: &Uri, - ) -> Result> { - self.schemas - .get(uri.as_str()) - .cloned() - .ok_or_else(|| "Schema not found".into()) - } - } - - #[tokio::test] - async fn test_default_async_retriever_with_remote_refs() { - let result = Registry::options() - .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema1", - Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), - )]) - .await; - - let error = result.expect_err("Should fail"); - assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); - assert!(error.source().is_some()); - } - - #[tokio::test] - async fn test_async_options() { - let _registry = Registry::options() - .async_retriever(DefaultRetriever) - .build([("", Draft::default().create_resource(json!({})))]) - .await - .expect("Invalid resources"); - } - - #[tokio::test] - async fn test_async_registry_with_duplicate_input_uris() { - let input_resources = vec![ - ( - "http://example.com/schema", - json!({ - "type": "object", - "properties": { - "foo": { "type": "string" } - } - }), - ), - ( - "http://example.com/schema", - json!({ - "type": "object", - "properties": { - "bar": { "type": "number" } - } - }), - ), - ]; - - let result = Registry::options() - .async_retriever(DefaultRetriever) - .build( - input_resources - .into_iter() - .map(|(uri, value)| (uri, Draft::Draft202012.create_resource(value))), - ) - .await; - - assert!( - result.is_ok(), - "Failed to create registry with duplicate input URIs" - ); - let registry = result.unwrap(); - - let resource = registry - .resources - .get(&uri::from_str("http://example.com/schema").expect("Invalid URI")) - .unwrap(); - let properties = resource - .contents() - .get("properties") - .and_then(|v| v.as_object()) - .unwrap(); - - assert!( - !properties.contains_key("bar"), - "Registry should contain the earliest added schema" - ); - assert!( - properties.contains_key("foo"), - "Registry should contain the overwritten schema" - ); - } - - #[tokio::test] - async fn test_async_try_with_resource() { - let retriever = TestAsyncRetriever::with_schema( - "http://example.com/schema2", - json!({"type": "object"}), - ); - - let registry = Registry::options() - .async_retriever(retriever) - .build([( - "http://example.com", - Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), - )]) - .await - .expect("Invalid resource"); - - let resolver = registry.try_resolver("").expect("Invalid base URI"); - let resolved = resolver - .lookup("http://example.com/schema2") - .expect("Lookup failed"); - assert_eq!(resolved.contents(), &json!({"type": "object"})); - } - - #[tokio::test] - async fn test_async_registry_with_multiple_refs() { - let retriever = TestAsyncRetriever { - schemas: AHashMap::from_iter([ - ( - "http://example.com/schema2".to_string(), - json!({"type": "object"}), - ), - ( - "http://example.com/schema3".to_string(), - json!({"type": "string"}), - ), - ]), - }; - - let registry = Registry::options() - .async_retriever(retriever) - .build([( - "http://example.com/schema1", - Resource::from_contents(json!({ - "type": "object", - "properties": { - "obj": {"$ref": "http://example.com/schema2"}, - "str": {"$ref": "http://example.com/schema3"} - } - })), - )]) - .await - .expect("Invalid resource"); - - let resolver = registry.try_resolver("").expect("Invalid base URI"); - - // Check both references are resolved correctly - let resolved2 = resolver - .lookup("http://example.com/schema2") - .expect("Lookup failed"); - assert_eq!(resolved2.contents(), &json!({"type": "object"})); - - let resolved3 = resolver - .lookup("http://example.com/schema3") - .expect("Lookup failed"); - assert_eq!(resolved3.contents(), &json!({"type": "string"})); - } - - #[tokio::test] - async fn test_async_registry_with_nested_refs() { - let retriever = TestAsyncRetriever { - schemas: AHashMap::from_iter([ - ( - "http://example.com/address".to_string(), - json!({ - "type": "object", - "properties": { - "street": {"type": "string"}, - "city": {"$ref": "http://example.com/city"} - } - }), - ), - ( - "http://example.com/city".to_string(), - json!({ - "type": "string", - "minLength": 1 - }), - ), - ]), - }; - - let registry = Registry::options() - .async_retriever(retriever) - .build([( - "http://example.com/person", - Resource::from_contents(json!({ - "type": "object", - "properties": { - "name": {"type": "string"}, - "address": {"$ref": "http://example.com/address"} - } - })), - )]) - .await - .expect("Invalid resource"); - - let resolver = registry.try_resolver("").expect("Invalid base URI"); - - // Verify nested reference resolution - let resolved = resolver - .lookup("http://example.com/city") - .expect("Lookup failed"); - assert_eq!( - resolved.contents(), - &json!({"type": "string", "minLength": 1}) - ); - } - - // Multiple refs to the same external schema with different fragments were fetched multiple times in async mode. - #[tokio::test] - async fn test_async_registry_with_duplicate_fragment_refs() { - static FETCH_COUNT: AtomicUsize = AtomicUsize::new(0); - - struct CountingRetriever { - inner: TestAsyncRetriever, - } - - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] - impl crate::AsyncRetrieve for CountingRetriever { - async fn retrieve( - &self, - uri: &Uri, - ) -> Result> { - FETCH_COUNT.fetch_add(1, Ordering::SeqCst); - self.inner.retrieve(uri).await - } - } - - FETCH_COUNT.store(0, Ordering::SeqCst); - - let retriever = CountingRetriever { - inner: TestAsyncRetriever::with_schema( - "http://example.com/external", - json!({ - "$defs": { - "foo": { - "type": "object", - "properties": { - "nested": { "type": "string" } - } - }, - "bar": { - "type": "object", - "properties": { - "value": { "type": "integer" } - } - } - } - }), - ), - }; - - // Schema references the same external URL with different fragments - let registry = Registry::options() - .async_retriever(retriever) - .build([( - "http://example.com/main", - Resource::from_contents(json!({ - "type": "object", - "properties": { - "name": { "$ref": "http://example.com/external#/$defs/foo" }, - "age": { "$ref": "http://example.com/external#/$defs/bar" } - } - })), - )]) - .await - .expect("Invalid resource"); - - // Should only fetch the external schema once - let fetches = FETCH_COUNT.load(Ordering::SeqCst); - assert_eq!( - fetches, 1, - "External schema should be fetched only once, but was fetched {fetches} times" - ); - - let resolver = registry - .try_resolver("http://example.com/main") - .expect("Invalid base URI"); - - // Verify both fragment references resolve correctly - let foo = resolver - .lookup("http://example.com/external#/$defs/foo") - .expect("Lookup failed"); - assert_eq!( - foo.contents(), - &json!({ - "type": "object", - "properties": { - "nested": { "type": "string" } - } - }) - ); - - let bar = resolver - .lookup("http://example.com/external#/$defs/bar") - .expect("Lookup failed"); - assert_eq!( - bar.contents(), - &json!({ - "type": "object", - "properties": { - "value": { "type": "integer" } - } - }) - ); - } -} diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs new file mode 100644 index 00000000..2d68146c --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -0,0 +1,2822 @@ +//! BFS pipeline that processes pending resources into the prepared index. +//! +//! Entry points: +//! - [`index_resources`]: processes pending resources and returns a prepared index. +//! - [`build_prepared_index_for_documents`]: builds an index from pre-stored documents +//! (used by the static [`super::SPECIFICATIONS`] registry). +//! +//! [`StoredDocument`] wraps a [`Cow`](std::borrow::Cow) so the registry holds +//! both borrowed (externally-owned, zero-copy) and owned (retrieved) documents uniformly. + +use std::{borrow::Cow, collections::VecDeque, num::NonZeroUsize, sync::Arc}; + +use ahash::{AHashMap, AHashSet}; +use fluent_uri::{pct_enc::EStr, Uri}; +use serde_json::Value; + +use crate::{ + cache::UriCache, + meta::metas_for_draft, + pointer::{pointer, ParsedPointer, ParsedPointerSegment}, + spec::{ChildNode, PathSegment}, + uri, Draft, Error, JsonPointerNode, Retrieve, +}; + +use super::{index::Index, input::PendingResource}; + +/// A schema document stored in the registry, either borrowed from the caller or owned. +#[derive(Debug)] +pub(super) struct StoredDocument<'a> { + value: Cow<'a, Value>, + draft: Draft, +} + +impl<'a> StoredDocument<'a> { + #[inline] + pub(super) fn owned(value: Value, draft: Draft) -> Self { + Self { + value: Cow::Owned(value), + draft, + } + } + + #[inline] + pub(super) fn borrowed(value: &'a Value, draft: Draft) -> Self { + Self { + value: Cow::Borrowed(value), + draft, + } + } + + #[inline] + pub(super) fn contents(&self) -> &Value { + &self.value + } + + #[inline] + pub(super) fn borrowed_contents(&self) -> Option<&'a Value> { + match &self.value { + Cow::Borrowed(value) => Some(value), + Cow::Owned(_) => None, + } + } + + #[inline] + pub(super) fn draft(&self) -> Draft { + self.draft + } +} + +pub(super) type DocumentStore<'a> = AHashMap>, Arc>>; +pub(super) type KnownResources = AHashSet>; +type ReferenceTracker = AHashSet; +type VisitedRefs<'a> = AHashSet<(NonZeroUsize, &'a str)>; +/// An entry in the processing queue. +/// `(base_uri, document_root_uri, pointer, draft)` +/// +/// `pointer` is a JSON Pointer relative to the document root (`""` means root). +/// Local `$ref`s are always resolved against the document root. +type QueueEntry = (Arc>, Arc>, String, Draft); + +/// A deferred local `$ref` target. +/// +/// Like [`QueueEntry`] but carries the pre-resolved value address (`value_addr`) obtained +/// for free during the `pointer()` call at push time. Used in [`process_deferred_refs`] to +/// skip already-visited targets without a second `pointer()` traversal. +/// +/// `(base_uri, document_root_uri, pointer, draft, value_addr)` +type DeferredRef = (Arc>, Arc>, String, Draft, usize); + +pub(super) fn index_resources<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn Retrieve, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + draft_override: Option, +) -> Result<(Vec, Index<'a>), Error> { + let mut state = ProcessingState::new(); + ingest_input_resources( + pairs, + documents, + known_resources, + &mut state, + draft_override, + ); + resolve_and_index( + &mut state, + documents, + known_resources, + resolution_cache, + draft_override.unwrap_or_default(), + retriever, + )?; + Ok((state.custom_metaschemas, state.index)) +} + +#[cfg(feature = "retrieve-async")] +pub(super) async fn index_resources_async<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn crate::AsyncRetrieve, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + draft_override: Option, +) -> Result<(Vec, Index<'a>), Error> { + let mut state = ProcessingState::new(); + ingest_input_resources( + pairs, + documents, + known_resources, + &mut state, + draft_override, + ); + resolve_and_index_async( + &mut state, + documents, + known_resources, + resolution_cache, + draft_override.unwrap_or_default(), + retriever, + ) + .await?; + Ok((state.custom_metaschemas, state.index)) +} + +/// Build prepared local index data for all documents already in `documents`. +pub(super) fn build_prepared_index_for_documents<'a>( + documents: &DocumentStore<'a>, + resolution_cache: &mut UriCache, +) -> Result, Error> { + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + + for (doc_uri, document) in documents { + known_resources.insert((**doc_uri).clone()); + state.index.register_document(doc_uri, document); + } + + for (doc_uri, document) in documents { + if let Some(document_root) = document.borrowed_contents() { + let mut local_seen = VisitedRefs::new(); + process_borrowed_document( + Arc::clone(doc_uri), + doc_uri, + document_root, + "", + document.draft(), + &mut state, + &mut known_resources, + resolution_cache, + &mut local_seen, + )?; + } else { + let mut local_seen = VisitedRefs::new(); + process_owned_document( + Arc::clone(doc_uri), + doc_uri, + document, + "", + document.draft(), + &mut state, + &mut known_resources, + resolution_cache, + &mut local_seen, + )?; + } + } + Ok(state.index) +} + +#[derive(Hash, Eq, PartialEq)] +struct ReferenceKey { + base_ptr: NonZeroUsize, + reference: String, +} + +impl ReferenceKey { + fn new(base: &Arc>, reference: &str) -> Self { + Self { + base_ptr: NonZeroUsize::new(Arc::as_ptr(base) as usize) + .expect("Arc pointer should never be null"), + reference: reference.to_owned(), + } + } +} + +/// Clears a [`VisitedRefs`] set and reinterprets it with a different borrow lifetime, +/// reusing the backing heap allocation across processing phases. +/// +/// # Safety +/// - The set is cleared before the lifetime change, so no `'a` references remain live. +/// - `(NonZeroUsize, &'a str)` and `(NonZeroUsize, &'b str)` have identical memory layouts +/// for any two lifetimes (`&str` is a fat pointer whose size/alignment are lifetime-independent). +/// - After `clear()` the heap allocation holds no initialized `T` values, so no pointer in +/// the allocation is ever read through the wrong lifetime. +/// - Verified under MIRI (tree borrows): no undefined behaviour detected. +#[allow(unsafe_code)] +#[inline] +unsafe fn reuse_visited_local_refs<'b>(mut s: VisitedRefs<'_>) -> VisitedRefs<'b> { + s.clear(); + // SAFETY: see above — layouts identical, no live 'a refs after clear() + std::mem::transmute(s) +} + +/// Reinterpret `&Value` as `&'long Value` for a value inside an `Arc>`. +/// +/// # Safety +/// - The `Arc>` is kept alive for the entire BFS pass +/// (it lives in `DocumentStore<'long>` whose borrow outlives all BFS calls). +/// - `Value` contains no interior mutability, so no aliasing rule is violated. +#[allow(unsafe_code)] +#[inline] +unsafe fn extend_value_lifetime<'long>(value: &Value) -> &'long Value { + &*std::ptr::from_ref::(value) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum ReferenceKind { + Ref, + Schema, +} + +/// Lifetime-free traversal state passed to external-resource collection helpers. +struct TraversalCtx { + seen: ReferenceTracker, + external: AHashSet<(String, Uri, ReferenceKind)>, + scratch: String, + refers_metaschemas: bool, + /// Tracks schema pointer addresses we have visited during recursive collection. + visited_schemas: AHashSet, + deferred_refs: Vec, +} + +impl TraversalCtx { + fn new() -> Self { + Self { + seen: ReferenceTracker::new(), + external: AHashSet::new(), + scratch: String::new(), + refers_metaschemas: false, + visited_schemas: AHashSet::new(), + deferred_refs: Vec::new(), + } + } +} + +/// Controls how `explore_subtree` registers subresources and traverses children. +/// +/// Two concrete implementations: +/// - `BorrowedStrategy` — zero-sized type for externally-owned schema values. +/// - `OwnedStrategy` — carries the current JSON Pointer path and document reference. +/// +/// Both are `Copy` so the generic function can pass the strategy by value without cloning. +trait SubtreeStrategy<'v>: Copy { + /// Register `subschema` in the index at `key`. + fn register( + self, + index: &mut Index<'v>, + key: &Arc>, + draft: Draft, + insert: bool, + subschema: &'v Value, + ); + + /// Decide whether to register a subresource that has an `$id`. + /// + /// `is_root` — true when processing the root entry point of the document. + /// `new_base` — the resolved base URI after applying `$id`. + /// `root_uri` — the document root URI. + /// `changed` — true when `new_base != old_base`. + /// `has_anchor` — true when the object also has an anchor keyword. + fn should_register_with_id( + is_root: bool, + new_base: &Arc>, + root_uri: &Arc>, + changed: bool, + has_anchor: bool, + ) -> bool; + + /// Stream child nodes and recurse into them, extending the path/base as appropriate. + fn walk_children( + self, + draft: Draft, + object: &'v serde_json::Map, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error>; +} + +/// Strategy for schemas borrowed from the caller (zero-copy path). +#[derive(Copy, Clone)] +struct BorrowedStrategy; + +impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { + #[inline] + fn register( + self, + index: &mut Index<'v>, + key: &Arc>, + draft: Draft, + insert: bool, + subschema: &'v Value, + ) { + index.register_borrowed_subresource(key, draft, insert, subschema); + } + + #[inline] + fn should_register_with_id( + is_root: bool, + new_base: &Arc>, + root_uri: &Arc>, + _changed: bool, + _has_anchor: bool, + ) -> bool { + !(is_root && new_base == root_uri) + } + + fn walk_children( + self, + draft: Draft, + object: &'v serde_json::Map, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error> { + draft.for_each_child(object, &mut |child, child_draft| { + explore_subtree( + Arc::clone(&base), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + BorrowedStrategy, + local_seen, + ) + }) + } +} + +/// Strategy for owned documents (retrieved at runtime, stored behind `Arc`). +/// +/// Carries the document reference and the current JSON Pointer path so that +/// `register` can record exact pointer locations in the index. +#[derive(Copy, Clone)] +struct OwnedStrategy<'v, 'doc, 'key, 'node> { + document: &'doc Arc>, + path: &'node JsonPointerNode<'key, 'node>, +} + +impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { + #[inline] + fn register( + self, + index: &mut Index<'v>, + key: &Arc>, + draft: Draft, + insert: bool, + subschema: &'v Value, + ) { + let pointer = ParsedPointer::from_pointer_node(self.path); + index.register_owned_subresource(key, self.document, &pointer, draft, insert, subschema); + } + + #[inline] + fn should_register_with_id( + is_root: bool, + new_base: &Arc>, + root_uri: &Arc>, + changed: bool, + has_anchor: bool, + ) -> bool { + !(is_root && new_base == root_uri) && (changed || has_anchor) + } + + fn walk_children( + self, + draft: Draft, + object: &'v serde_json::Map, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error> { + draft.for_each_owned_child(object, &mut |child| { + with_owned_child_path(self.path, &child, |child_path| { + explore_subtree( + Arc::clone(&base), + document_root, + child.value, + child.draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + OwnedStrategy { + document: self.document, + path: child_path, + }, + local_seen, + ) + }) + }) + } +} + +struct ProcessingState<'a> { + queue: VecDeque, + custom_metaschemas: Vec, + index: Index<'a>, + ctx: TraversalCtx, +} + +impl ProcessingState<'_> { + fn new() -> Self { + Self { + queue: VecDeque::with_capacity(32), + custom_metaschemas: Vec::new(), + index: Index::default(), + ctx: TraversalCtx::new(), + } + } +} + +/// Convert resources into stored documents, register them with the +/// index, and enqueue them as the starting set for BFS traversal. +/// +/// `draft` forces a specific draft for all resources; `None` means auto-detect per resource. +/// Resources are added to `known_resources` here so the retriever does not re-fetch them +/// during the BFS loop. +fn ingest_input_resources<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + state: &mut ProcessingState<'a>, + draft: Option, +) { + for (uri, resource) in pairs { + let key = Arc::new(uri); + let (draft, document) = match resource { + PendingResource::Value(value) => { + let draft = draft.unwrap_or_else(|| Draft::default().detect(&value)); + (draft, StoredDocument::owned(value, draft)) + } + PendingResource::ValueRef(value) => { + let draft = draft.unwrap_or_else(|| Draft::default().detect(value)); + (draft, StoredDocument::borrowed(value, draft)) + } + PendingResource::Resource(resource) => { + let (draft, contents) = resource.into_inner(); + (draft, StoredDocument::owned(contents, draft)) + } + PendingResource::ResourceRef(resource) => { + let draft = resource.draft(); + (draft, StoredDocument::borrowed(resource.contents(), draft)) + } + }; + let document = Arc::new(document); + + documents.insert(Arc::clone(&key), Arc::clone(&document)); + known_resources.insert((*key).clone()); + state.index.register_document(&key, &document); + + // Draft::Unknown means the resource declared a custom $schema; collect its URI + // for post-build validation that a matching meta-schema was registered. + if draft == Draft::Unknown { + if let Some(meta) = document + .contents() + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|schema| schema.as_str()) + { + state.custom_metaschemas.push(meta.to_string()); + } + } + + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); + } +} + +fn process_queue<'r>( + state: &mut ProcessingState<'r>, + documents: &DocumentStore<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, +) -> Result<(), Error> { + while let Some((base, root_uri, pointer_path, draft)) = state.queue.pop_front() { + let Some(document) = documents.get(&root_uri) else { + continue; + }; + if let Some(document_root) = document.borrowed_contents() { + let mut visited = VisitedRefs::new(); + process_borrowed_document( + base, + &root_uri, + document_root, + &pointer_path, + draft, + state, + known_resources, + resolution_cache, + &mut visited, + )?; + continue; + } + let mut visited = VisitedRefs::new(); + process_owned_document( + base, + &root_uri, + document, + &pointer_path, + draft, + state, + known_resources, + resolution_cache, + &mut visited, + )?; + } + Ok(()) +} + +fn process_borrowed_document<'r>( + current_base_uri: Arc>, + document_root_uri: &Arc>, + document_root: &'r Value, + pointer_path: &str, + draft: Draft, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + visited: &mut VisitedRefs<'r>, +) -> Result<(), Error> { + let Some(subschema) = (if pointer_path.is_empty() { + Some(document_root) + } else { + pointer(document_root, pointer_path) + }) else { + return Ok(()); + }; + + explore_borrowed_subtree( + current_base_uri, + document_root, + subschema, + draft, + pointer_path.is_empty(), + document_root_uri, + state, + known_resources, + resolution_cache, + visited, + ) +} + +fn explore_subtree<'v, S: SubtreeStrategy<'v>>( + mut base: Arc>, + document_root: &'v Value, + subschema: &'v Value, + draft: Draft, + is_root_entry: bool, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + strategy: S, + local_seen: &mut VisitedRefs<'v>, +) -> Result<(), Error> { + let Some(object) = subschema.as_object() else { + return Ok(()); + }; + let analysis = draft.analyze_object(object); + + if let Some(id) = analysis.id { + let (new_base, changed) = + resolve_subresource_id(&base, id, known_resources, resolution_cache)?; + base = new_base; + if S::should_register_with_id( + is_root_entry, + &base, + document_root_uri, + changed, + analysis.has_anchor, + ) { + strategy.register(&mut state.index, &base, draft, changed, subschema); + } + } else if analysis.has_anchor && !is_root_entry { + strategy.register(&mut state.index, &base, draft, false, subschema); + } + + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.ctx.visited_schemas.insert(subschema_ptr) + && (analysis.ref_.is_some() || analysis.schema.is_some()) + { + collect_external_resources_from_values( + &base, + document_root, + analysis.ref_, + analysis.schema, + &mut state.ctx, + resolution_cache, + draft, + document_root_uri, + local_seen, + )?; + } + + strategy.walk_children( + draft, + object, + base, + document_root, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) +} + +fn explore_borrowed_subtree<'r>( + current_base_uri: Arc>, + document_root: &'r Value, + subschema: &'r Value, + draft: Draft, + is_root_entry: bool, + document_root_uri: &Arc>, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'r>, +) -> Result<(), Error> { + explore_subtree( + current_base_uri, + document_root, + subschema, + draft, + is_root_entry, + document_root_uri, + state, + known_resources, + resolution_cache, + BorrowedStrategy, + local_seen, + ) +} + +#[allow(unsafe_code)] +fn process_owned_document<'r>( + current_base_uri: Arc>, + document_root_uri: &Arc>, + document: &Arc>, + pointer_path: &str, + draft: Draft, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'r>, +) -> Result<(), Error> { + // SAFETY: document lives in DocumentStore<'r> for the full BFS duration. + let document_root: &'r Value = unsafe { extend_value_lifetime(document.contents()) }; + let Some(subschema) = (if pointer_path.is_empty() { + Some(document_root) + } else { + pointer(document_root, pointer_path).map(|v| unsafe { extend_value_lifetime(v) }) + }) else { + return Ok(()); + }; + let parsed_pointer = ParsedPointer::from_json_pointer(pointer_path); + + with_pointer_node_from_parsed(parsed_pointer.as_ref(), |path| { + explore_owned_subtree( + current_base_uri, + document_root, + subschema, + draft, + pointer_path.is_empty(), + path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) +} + +fn with_pointer_node_from_parsed( + pointer: Option<&ParsedPointer>, + f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, +) -> R { + fn descend<'a, 'node, R>( + segments: &'a [ParsedPointerSegment], + current: &'node JsonPointerNode<'a, 'node>, + f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, + ) -> R { + if let Some((head, tail)) = segments.split_first() { + let next = match head { + ParsedPointerSegment::Key(key) => current.push(key.as_ref()), + ParsedPointerSegment::Index(idx) => current.push(*idx), + }; + descend(tail, &next, f) + } else { + f(current) + } + } + + let root = JsonPointerNode::new(); + match pointer { + Some(pointer) => descend(&pointer.segments, &root, f), + None => f(&root), + } +} + +#[allow(unsafe_code)] +fn explore_owned_subtree<'r>( + current_base_uri: Arc>, + document_root: &'r Value, + subschema: &'r Value, + draft: Draft, + is_root_entry: bool, + path: &JsonPointerNode<'_, '_>, + document_root_uri: &Arc>, + document: &Arc>, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'r>, +) -> Result<(), Error> { + explore_subtree( + current_base_uri, + document_root, + subschema, + draft, + is_root_entry, + document_root_uri, + state, + known_resources, + resolution_cache, + OwnedStrategy { document, path }, + local_seen, + ) +} + +fn enqueue_fragment_entry( + uri: &Uri, + key: &Arc>, + default_draft: Draft, + documents: &DocumentStore<'_>, + queue: &mut VecDeque, +) { + if let Some(fragment) = uri.fragment() { + let Some(document) = documents.get(key) else { + return; + }; + if let Some(resolved) = pointer(document.contents(), fragment.as_str()) { + let fragment_draft = default_draft.detect(resolved); + queue.push_back(( + Arc::clone(key), + Arc::clone(key), + fragment.as_str().to_string(), + fragment_draft, + )); + } + } +} + +fn handle_metaschemas<'a>( + refers_metaschemas: bool, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + draft_version: Draft, + state: &mut ProcessingState<'a>, +) -> Result<(), Error> { + if !refers_metaschemas { + return Ok(()); + } + + let schemas = metas_for_draft(draft_version); + for (uri, schema) in schemas { + let key = Arc::new(uri::from_str(uri.trim_end_matches('#'))?); + if documents.contains_key(&key) { + continue; + } + let draft = Draft::default().detect(schema); + documents.insert( + Arc::clone(&key), + Arc::new(StoredDocument::borrowed(schema, draft)), + ); + known_resources.insert((*key).clone()); + state.index.register_document( + &key, + documents + .get(&key) + .expect("meta-schema document was just inserted into the store"), + ); + state + .queue + .push_back((Arc::clone(&key), Arc::clone(&key), String::new(), draft)); + } + Ok(()) +} + +fn create_resource<'a>( + retrieved: Value, + fragmentless: Uri, + default_draft: Draft, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + index: &mut Index<'a>, + custom_metaschemas: &mut Vec, +) -> (Arc>, Draft) { + let draft = default_draft.detect(&retrieved); + let key = Arc::new(fragmentless); + documents.insert( + Arc::clone(&key), + Arc::new(StoredDocument::owned(retrieved, draft)), + ); + + let contents = documents + .get(&key) + .expect("document was just inserted") + .contents(); + known_resources.insert((*key).clone()); + index.register_document( + &key, + documents + .get(&key) + .expect("retrieved document was just inserted into the store"), + ); + + if draft == Draft::Unknown { + if let Some(meta_schema) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|schema| schema.as_str()) + { + custom_metaschemas.push(meta_schema.to_string()); + } + } + + (key, draft) +} + +/// Drain the BFS queue and process all deferred local refs, reusing `local_seen_buf` +/// across iterations to avoid repeated allocation. +#[allow(unsafe_code)] +fn drain_queue_and_deferred<'a>( + state: &mut ProcessingState<'a>, + documents: &DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + visited: &mut VisitedRefs<'static>, +) -> Result<(), Error> { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_visited: VisitedRefs<'_> = + unsafe { reuse_visited_local_refs(std::mem::take(visited)) }; + process_queue(state, documents, known_resources, resolution_cache)?; + process_deferred_refs(state, documents, resolution_cache, &mut local_visited)?; + // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. + *visited = unsafe { reuse_visited_local_refs(local_visited) }; + Ok(()) +} + +/// Fetch all pending external resources synchronously and enqueue the results. +fn fetch_external_resources<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + default_draft: Draft, + retriever: &dyn Retrieve, +) -> Result<(), Error> { + for (original, uri, kind) in state.ctx.external.drain() { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if !known_resources.contains(&fragmentless) { + let retrieved = match retriever.retrieve(&fragmentless) { + Ok(retrieved) => retrieved, + Err(error) => { + handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; + continue; + } + }; + let (key, draft) = create_resource( + retrieved, + fragmentless, + default_draft, + documents, + known_resources, + &mut state.index, + &mut state.custom_metaschemas, + ); + enqueue_fragment_entry(&uri, &key, default_draft, documents, &mut state.queue); + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); + } + } + Ok(()) +} + +/// Fetch all pending external resources concurrently and enqueue the results. +/// Groups requests by base URI and issues them in a single `join_all` batch. +#[cfg(feature = "retrieve-async")] +async fn fetch_external_resources_async<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + default_draft: Draft, + retriever: &dyn crate::AsyncRetrieve, +) -> Result<(), Error> { + type ExternalRefsByBase = AHashMap, Vec<(String, Uri, ReferenceKind)>>; + + if state.ctx.external.is_empty() { + return Ok(()); + } + + let mut grouped = ExternalRefsByBase::new(); + for (original, uri, kind) in state.ctx.external.drain() { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if !known_resources.contains(&fragmentless) { + grouped + .entry(fragmentless) + .or_default() + .push((original, uri, kind)); + } + } + + // Use grouped.keys() for futures (borrows) then grouped.into_iter() for results (consumes). + // The map is not mutated between the two iterations, so zip order is stable. + let results = futures::future::join_all(grouped.keys().map(|u| retriever.retrieve(u))).await; + + for ((fragmentless, refs), result) in grouped.into_iter().zip(results) { + let retrieved = match result { + Ok(retrieved) => retrieved, + Err(error) => { + if let Some((original, uri, kind)) = refs.into_iter().next() { + handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; + } + continue; + } + }; + let (key, draft) = create_resource( + retrieved, + fragmentless, + default_draft, + documents, + known_resources, + &mut state.index, + &mut state.custom_metaschemas, + ); + for (_, uri, _) in &refs { + enqueue_fragment_entry(uri, &key, default_draft, documents, &mut state.queue); + } + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); + } + Ok(()) +} + +/// Inject meta-schemas if referenced, then run a final queue pass to index them. +#[allow(unsafe_code)] +fn finalize_index<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + default_draft: Draft, + visited: VisitedRefs<'static>, +) -> Result<(), Error> { + handle_metaschemas( + state.ctx.refers_metaschemas, + documents, + known_resources, + default_draft, + state, + )?; + + if !state.queue.is_empty() { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_visited: VisitedRefs<'_> = unsafe { reuse_visited_local_refs(visited) }; + process_queue(state, documents, known_resources, resolution_cache)?; + process_deferred_refs(state, documents, resolution_cache, &mut local_visited)?; + } + + Ok(()) +} + +/// Shared sync processing loop used during registry preparation. After the +/// initial input has been ingested into `state`, this function drives the +/// BFS-fetch cycle until all reachable external resources have been retrieved, +/// then handles meta-schema injection and runs a final queue pass. +fn resolve_and_index<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + default_draft: Draft, + retriever: &dyn Retrieve, +) -> Result<(), Error> { + let mut visited: VisitedRefs<'static> = VisitedRefs::new(); + + while !(state.queue.is_empty() && state.ctx.external.is_empty()) { + drain_queue_and_deferred( + state, + documents, + known_resources, + resolution_cache, + &mut visited, + )?; + fetch_external_resources(state, documents, known_resources, default_draft, retriever)?; + } + + finalize_index( + state, + documents, + known_resources, + resolution_cache, + default_draft, + visited, + ) +} + +/// Shared async processing loop used during registry preparation. Batches +/// concurrent external retrievals with `join_all` and otherwise mirrors +/// [`resolve_and_index`]. +#[cfg(feature = "retrieve-async")] +async fn resolve_and_index_async<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + default_draft: Draft, + retriever: &dyn crate::AsyncRetrieve, +) -> Result<(), Error> { + let mut visited: VisitedRefs<'static> = VisitedRefs::new(); + + while !(state.queue.is_empty() && state.ctx.external.is_empty()) { + drain_queue_and_deferred( + state, + documents, + known_resources, + resolution_cache, + &mut visited, + )?; + fetch_external_resources_async(state, documents, known_resources, default_draft, retriever) + .await?; + } + + finalize_index( + state, + documents, + known_resources, + resolution_cache, + default_draft, + visited, + ) +} + +fn handle_retrieve_error( + uri: &Uri, + // The original reference string is used in error messages for `json-schema://` URIs + // where the resolved URI is not user-friendly (e.g. "./foo.json" vs "json-schema:///foo.json"). + original: &str, + fragmentless: &Uri, + error: Box, + kind: ReferenceKind, +) -> Result<(), Error> { + match kind { + ReferenceKind::Schema => Ok(()), + ReferenceKind::Ref => { + if uri.scheme().as_str() == "json-schema" { + Err(Error::unretrievable( + original, + "No base URI is available".into(), + )) + } else { + Err(Error::unretrievable(fragmentless.as_str(), error)) + } + } + } +} + +fn with_owned_child_path( + path: &JsonPointerNode<'_, '_>, + child: &ChildNode<'_>, + f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, +) -> R { + let first = match child.first { + PathSegment::Key(key) => path.push(key), + PathSegment::Index(index) => path.push(index), + }; + match child.second { + Some(PathSegment::Key(key)) => { + let second = first.push(key); + f(&second) + } + Some(PathSegment::Index(index)) => { + let second = first.push(index); + f(&second) + } + None => f(&first), + } +} + +fn collect_external_resources_from_values<'doc>( + base: &Arc>, + root: &'doc Value, + ref_: Option<&'doc str>, + schema: Option<&'doc str>, + ctx: &mut TraversalCtx, + resolution_cache: &mut UriCache, + draft: Draft, + doc_key: &Arc>, + visited: &mut VisitedRefs<'doc>, +) -> Result<(), Error> { + for (reference, key) in [(ref_, "$ref"), (schema, "$schema")] { + let Some(reference) = reference else { + continue; + }; + if reference.starts_with("https://json-schema.org/draft/") + || reference.starts_with("http://json-schema.org/draft-") + || base.as_str().starts_with("https://json-schema.org/draft/") + { + if key == "$ref" { + ctx.refers_metaschemas = true; + } + continue; + } + if reference == "#" { + continue; + } + if reference.starts_with('#') { + if mark_visited_local_ref(visited, base, reference) { + let ptr = reference.trim_start_matches('#'); + if let Some(referenced) = pointer(root, ptr) { + let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; + ctx.deferred_refs.push(( + Arc::clone(base), + Arc::clone(doc_key), + ptr.to_string(), + target_draft, + value_addr, + )); + } + } + continue; + } + if mark_reference(&mut ctx.seen, base, reference) { + let resolved = if base.has_fragment() { + let mut base_without_fragment = base.as_ref().clone(); + base_without_fragment.set_fragment(None); + + let (path, fragment) = match reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => (reference, None), + }; + + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, &mut ctx.scratch); + resolved = resolved + .with_fragment(Some(uri::EncodedString::new_or_panic(&ctx.scratch))); + ctx.scratch.clear(); + } + } + resolved + } else { + (*resolution_cache.resolve_against(&base.borrow(), reference)?).clone() + }; + + let kind = if key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + ctx.external.insert((reference.to_string(), resolved, kind)); + } + } + Ok(()) +} + +pub(super) fn validate_custom_metaschemas( + custom_metaschemas: &[String], + known_resources: &KnownResources, +) -> Result<(), Error> { + for schema_uri in custom_metaschemas { + match uri::from_str(schema_uri) { + Ok(mut meta_uri) => { + meta_uri.set_fragment(None); + if !known_resources.contains(&meta_uri) { + return Err(Error::unknown_specification(schema_uri)); + } + } + Err(_) => { + return Err(Error::unknown_specification(schema_uri)); + } + } + } + Ok(()) +} + +fn collect_external_resources<'doc>( + base: &Arc>, + root: &'doc Value, + contents: &'doc Value, + ctx: &mut TraversalCtx, + resolution_cache: &mut UriCache, + draft: Draft, + doc_key: &Arc>, + visited: &mut VisitedRefs<'doc>, +) -> Result<(), Error> { + if base.scheme().as_str() == "urn" { + return Ok(()); + } + + macro_rules! on_reference { + ($reference:expr, $key:literal) => { + if $reference.starts_with("https://json-schema.org/draft/") + || $reference.starts_with("http://json-schema.org/draft-") + || base.as_str().starts_with("https://json-schema.org/draft/") + { + if $key == "$ref" { + ctx.refers_metaschemas = true; + } + } else if $reference != "#" { + if $reference.starts_with('#') { + if mark_visited_local_ref(visited, base, $reference) { + let ptr = $reference.trim_start_matches('#'); + if let Some(referenced) = pointer(root, ptr) { + let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; + ctx.deferred_refs.push(( + Arc::clone(base), + Arc::clone(doc_key), + ptr.to_string(), + target_draft, + value_addr, + )); + } + } + } else if mark_reference(&mut ctx.seen, base, $reference) { + let resolved = if base.has_fragment() { + let mut base_without_fragment = base.as_ref().clone(); + base_without_fragment.set_fragment(None); + + let (path, fragment) = match $reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => ($reference, None), + }; + + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, &mut ctx.scratch); + resolved = resolved.with_fragment(Some( + uri::EncodedString::new_or_panic(&ctx.scratch), + )); + ctx.scratch.clear(); + } + } + resolved + } else { + (*resolution_cache.resolve_against(&base.borrow(), $reference)?).clone() + }; + + let kind = if $key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + ctx.external + .insert(($reference.to_string(), resolved, kind)); + } + } + }; + } + + if let Some(object) = contents.as_object() { + if object.len() < 3 { + for (key, value) in object { + if key == "$ref" { + if let Some(reference) = value.as_str() { + on_reference!(reference, "$ref"); + } + } else if key == "$schema" { + if let Some(reference) = value.as_str() { + on_reference!(reference, "$schema"); + } + } + } + } else { + if let Some(reference) = object.get("$ref").and_then(Value::as_str) { + on_reference!(reference, "$ref"); + } + if let Some(reference) = object.get("$schema").and_then(Value::as_str) { + on_reference!(reference, "$schema"); + } + } + } + Ok(()) +} + +fn collect_external_resources_recursive<'doc>( + base: &Arc>, + root: &'doc Value, + contents: &'doc Value, + ctx: &mut TraversalCtx, + resolution_cache: &mut UriCache, + draft: Draft, + doc_key: &Arc>, + visited_refs: &mut VisitedRefs<'doc>, +) -> Result<(), Error> { + let ptr = std::ptr::from_ref::(contents) as usize; + if !ctx.visited_schemas.insert(ptr) { + return Ok(()); + } + + let current_base = match draft.id_of(contents) { + Some(id) => resolve_id(base, id, resolution_cache)?, + None => Arc::clone(base), + }; + + collect_external_resources( + ¤t_base, + root, + contents, + ctx, + resolution_cache, + draft, + doc_key, + visited_refs, + )?; + + for subresource in draft.subresources_of(contents) { + let subresource_draft = draft.detect(subresource); + collect_external_resources_recursive( + ¤t_base, + root, + subresource, + ctx, + resolution_cache, + subresource_draft, + doc_key, + visited_refs, + )?; + } + Ok(()) +} + +/// Process deferred local-ref targets collected during the main traversal. +/// +/// Called after `process_queue` finishes so that all subresource nodes are already in +/// `visited_schemas`. Targets that were visited by the main BFS (e.g. `#/definitions/Foo` +/// under a JSON Schema keyword) are skipped in O(1) via the pre-stored value address, +/// avoiding a redundant `pointer()` traversal. Non-subresource targets +/// (e.g. `#/components/schemas/Foo`) are still fully traversed. New deferred entries +/// added during traversal are also processed iteratively until none remain. +fn process_deferred_refs<'a>( + state: &mut ProcessingState<'_>, + documents: &'a DocumentStore<'a>, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'a>, +) -> Result<(), Error> { + while !state.ctx.deferred_refs.is_empty() { + let batch = std::mem::take(&mut state.ctx.deferred_refs); + for (base, doc_key, pointer_path, draft, value_addr) in batch { + // Fast path: if this target was already visited by the main BFS traversal + // (e.g. a `#/definitions/Foo` that `walk_subresources_with_path` descended into), + // all its subresources were processed and `collect_external_resources` was already + // called on each — skip without a redundant `pointer()` traversal. + if state.ctx.visited_schemas.contains(&value_addr) { + continue; + } + let Some(document) = documents.get(&doc_key) else { + continue; + }; + let root = document.contents(); + let Some(contents) = (if pointer_path.is_empty() { + Some(root) + } else { + pointer(root, &pointer_path) + }) else { + continue; + }; + collect_external_resources_recursive( + &base, + root, + contents, + &mut state.ctx, + resolution_cache, + draft, + &doc_key, + local_seen, + )?; + } + } + Ok(()) +} + +fn mark_reference(seen: &mut ReferenceTracker, base: &Arc>, reference: &str) -> bool { + seen.insert(ReferenceKey::new(base, reference)) +} + +fn mark_visited_local_ref<'a>( + local_seen: &mut VisitedRefs<'a>, + base: &Arc>, + reference: &'a str, +) -> bool { + let base_ptr = + NonZeroUsize::new(Arc::as_ptr(base) as usize).expect("Arc pointer should never be null"); + local_seen.insert((base_ptr, reference)) +} + +fn resolve_id( + base: &Arc>, + id: &str, + resolution_cache: &mut UriCache, +) -> Result>, Error> { + if id.starts_with('#') { + return Ok(Arc::clone(base)); + } + let mut resolved = (*resolution_cache.resolve_against(&base.borrow(), id)?).clone(); + if resolved.fragment().is_some_and(EStr::is_empty) { + resolved.set_fragment(None); + } + Ok(Arc::new(resolved)) +} + +/// Resolves `$id` against the current base, updates `known_resources`, and returns +/// `(new_base, changed)` where `changed` is true when the URI actually changed. +fn resolve_subresource_id( + current_base_uri: &Arc>, + id: &str, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, +) -> Result<(Arc>, bool), Error> { + let new_base = resolve_id(current_base_uri, id, resolution_cache)?; + let changed = new_base != *current_base_uri; + known_resources.insert((*new_base).clone()); + Ok((new_base, changed)) +} + +#[cfg(test)] +mod tests { + use std::{error::Error as _, sync::Arc}; + + use ahash::AHashMap; + use fluent_uri::Uri; + use serde_json::{json, Value}; + use test_case::test_case; + + use super::*; + use crate::{ + cache::UriCache, + registry::{index::IndexedResource, SPECIFICATIONS}, + uri::from_str, + Anchor, Draft, Registry, Resource, Retrieve, + }; + + #[test] + fn test_invalid_uri_on_registry_creation() { + let schema = Draft::Draft202012.create_resource(json!({})); + let result = Registry::new().add(":/example.com", schema); + let error = result.expect_err("Should fail"); + + assert_eq!( + error.to_string(), + "Invalid URI reference ':/example.com': unexpected character at index 0" + ); + let source_error = error.source().expect("Should have a source"); + let inner_source = source_error.source().expect("Should have a source"); + assert_eq!(inner_source.to_string(), "unexpected character at index 0"); + } + + #[test] + fn test_lookup_unresolvable_url() { + // Create a registry with a single resource + let schema = Draft::Draft202012.create_resource(json!({ + "type": "object", + "properties": { + "foo": { "type": "string" } + } + })); + let registry = Registry::new() + .add("http://example.com/schema1", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + // Attempt to create a resolver for a URL not in the registry + let resolver = registry.resolver( + from_str("http://example.com/non_existent_schema").expect("Invalid base URI"), + ); + + let result = resolver.lookup(""); + + assert_eq!( + result.unwrap_err().to_string(), + "Resource 'http://example.com/non_existent_schema' is not present in a registry and retrieving it failed: Retrieving external resources is not supported once the registry is populated" + ); + } + + #[test] + fn test_registry_can_be_built_from_borrowed_resources() { + let schema = json!({"type": "string"}); + let registry = Registry::new() + .add("urn:root", &schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + assert!(registry.contains_resource("urn:root")); + } + + #[test] + fn test_prepare_builds_local_entries_for_borrowed_and_owned() { + let root = json!({"$ref": "http://example.com/remote"}); + let remote = json!({"type": "string"}); + let registry = Registry::new() + .retriever(create_test_retriever(&[( + "http://example.com/remote", + remote.clone(), + )])) + .add("http://example.com/root", &root) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + let root_uri = from_str("http://example.com/root").expect("Invalid root URI"); + let remote_uri = from_str("http://example.com/remote").expect("Invalid remote URI"); + + let root_resource = registry + .resource_by_uri(&root_uri) + .expect("Borrowed root should be available from prepared local entries"); + let remote_resource = registry + .resource_by_uri(&remote_uri) + .expect("Owned retrieved document should be available from prepared local entries"); + + assert_eq!(root_resource.contents(), &root); + assert_eq!(remote_resource.contents(), &remote); + } + + #[test] + fn test_prepare_populates_local_entries_for_subresources_and_anchors() { + let registry = Registry::new() + .add( + "http://example.com/root", + json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "$anchor": "node", + "type": "string" + } + } + }), + ) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + let embedded_uri = from_str("http://example.com/embedded").expect("Invalid embedded URI"); + let embedded_resource = registry + .resource_by_uri(&embedded_uri) + .expect("Embedded subresource should be available from prepared local entries"); + assert_eq!( + embedded_resource.contents(), + &json!({ + "$id": "http://example.com/embedded", + "$anchor": "node", + "type": "string" + }) + ); + + let embedded_anchor = registry + .anchor(&embedded_uri, "node") + .expect("Embedded anchor should be available from prepared local entries"); + match embedded_anchor { + Anchor::Default { resource, .. } => assert_eq!( + resource.contents(), + &json!({ + "$id": "http://example.com/embedded", + "$anchor": "node", + "type": "string" + }) + ), + Anchor::Dynamic { .. } => panic!("Expected a default anchor"), + } + } + + #[test] + fn test_process_borrowed_document_indexes_embedded_resource_as_borrowed() { + let schema = json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + }); + let doc_key = Arc::new(from_str("http://example.com/root").expect("valid root URI")); + let document = Arc::new(StoredDocument::borrowed(&schema, Draft::Draft202012)); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + let mut resolution_cache = UriCache::new(); + let mut local_seen = VisitedRefs::new(); + + known_resources.insert((*doc_key).clone()); + state.index.register_document(&doc_key, &document); + + process_borrowed_document( + Arc::clone(&doc_key), + &doc_key, + document + .borrowed_contents() + .expect("test document is borrowed"), + "", + Draft::Draft202012, + &mut state, + &mut known_resources, + &mut resolution_cache, + &mut local_seen, + ) + .expect("borrowed document traversal should succeed"); + + let embedded_uri = + Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); + match state.index.resources.get(&embedded_uri) { + Some(IndexedResource::Borrowed(resource)) => { + assert_eq!( + resource.contents(), + &json!({"$id": "http://example.com/embedded", "type": "string"}) + ); + } + other => panic!("expected borrowed embedded resource entry, got {other:?}"), + } + } + + #[test] + fn test_process_owned_document_indexes_embedded_resource_as_owned() { + let schema = json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + }); + let doc_key = Arc::new(from_str("http://example.com/root").expect("valid root URI")); + let document = Arc::new(StoredDocument::owned(schema, Draft::Draft202012)); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + let mut resolution_cache = UriCache::new(); + let mut local_seen = VisitedRefs::new(); + + known_resources.insert((*doc_key).clone()); + state.index.register_document(&doc_key, &document); + + process_owned_document( + Arc::clone(&doc_key), + &doc_key, + &document, + "", + Draft::Draft202012, + &mut state, + &mut known_resources, + &mut resolution_cache, + &mut local_seen, + ) + .expect("owned document traversal should succeed"); + + let embedded_uri = + Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); + match state.index.resources.get(&embedded_uri) { + Some(IndexedResource::Owned { .. }) => {} + other => panic!("expected owned embedded resource entry, got {other:?}"), + } + } + + #[test] + fn test_process_owned_document_indexes_fragment_root_with_pointer_prefix() { + let schema = json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + }); + let doc_key = Arc::new(from_str("http://example.com/root").expect("valid root URI")); + let document = Arc::new(StoredDocument::owned(schema, Draft::Draft202012)); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + let mut resolution_cache = UriCache::new(); + let mut local_seen = VisitedRefs::new(); + + known_resources.insert((*doc_key).clone()); + state.index.register_document(&doc_key, &document); + + process_owned_document( + Arc::clone(&doc_key), + &doc_key, + &document, + "/$defs/embedded", + Draft::Draft202012, + &mut state, + &mut known_resources, + &mut resolution_cache, + &mut local_seen, + ) + .expect("owned fragment traversal should succeed"); + + let embedded_uri = + Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); + match state.index.resources.get(&embedded_uri) { + Some(IndexedResource::Owned { pointer, .. }) => { + assert_eq!( + pointer.lookup(document.contents()), + Some(&json!({"$id": "http://example.com/embedded", "type": "string"})) + ); + } + other => panic!("expected owned embedded resource entry, got {other:?}"), + } + } + + #[test] + fn test_prepare_merges_anchor_entries_for_shared_effective_uri() { + let registry = Registry::new() + .add( + "http://example.com/root", + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "first": { + "$anchor": "first", + "type": "string" + }, + "second": { + "$anchor": "second", + "type": "integer" + } + } + }), + ) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + let resolver = registry.resolver(from_str("http://example.com/root").expect("Invalid URI")); + + assert_eq!( + resolver + .lookup("#first") + .expect("First anchor should resolve") + .contents(), + &json!({ + "$anchor": "first", + "type": "string" + }) + ); + assert_eq!( + resolver + .lookup("#second") + .expect("Second anchor should resolve") + .contents(), + &json!({ + "$anchor": "second", + "type": "integer" + }) + ); + } + + #[test] + fn test_relative_uri_without_base() { + let schema = Draft::Draft202012.create_resource(json!({"$ref": "./virtualNetwork.json"})); + let error = Registry::new() + .add("json-schema:///", schema) + .expect("Root resource should be accepted") + .prepare() + .expect_err("Should fail"); + assert_eq!(error.to_string(), "Resource './virtualNetwork.json' is not present in a registry and retrieving it failed: No base URI is available"); + } + + #[test] + fn test_prepare_requires_registered_custom_meta_schema() { + let base_registry = Registry::new() + .add( + "http://example.com/root", + Resource::from_contents(json!({"type": "object"})), + ) + .expect("Base registry should be created") + .prepare() + .expect("Base registry should be created"); + + let custom_schema = Resource::from_contents(json!({ + "$id": "http://example.com/custom", + "$schema": "http://example.com/meta/custom", + "type": "string" + })); + + let error = base_registry + .add("http://example.com/custom", custom_schema) + .expect("Schema should be accepted") + .prepare() + .expect_err("Extending registry must fail when the custom $schema is not registered"); + + let error_msg = error.to_string(); + assert_eq!( + error_msg, + "Unknown meta-schema: 'http://example.com/meta/custom'. Custom meta-schemas must be registered in the registry before use" + ); + } + + #[test] + fn test_prepare_accepts_registered_custom_meta_schema_fragment() { + let meta_schema = Resource::from_contents(json!({ + "$id": "http://example.com/meta/custom#", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object" + })); + + let registry = Registry::new() + .add("http://example.com/meta/custom#", meta_schema) + .expect("Meta-schema should be registered successfully") + .prepare() + .expect("Meta-schema should be registered successfully"); + + let schema = Resource::from_contents(json!({ + "$id": "http://example.com/schemas/my-schema", + "$schema": "http://example.com/meta/custom#", + "type": "string" + })); + + registry + .add("http://example.com/schemas/my-schema", schema) + .expect("Schema should be accepted") + .prepare() + .expect("Schema should accept registered meta-schema URI with trailing '#'"); + } + + #[test] + fn test_chained_custom_meta_schemas() { + // Meta-schema B (uses standard Draft 2020-12) + let meta_schema_b = json!({ + "$id": "json-schema:///meta/level-b", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true, + "https://json-schema.org/draft/2020-12/vocab/validation": true, + }, + "type": "object", + "properties": { + "customProperty": {"type": "string"} + } + }); + + // Meta-schema A (uses Meta-schema B) + let meta_schema_a = json!({ + "$id": "json-schema:///meta/level-a", + "$schema": "json-schema:///meta/level-b", + "customProperty": "level-a-meta", + "type": "object" + }); + + // Schema (uses Meta-schema A) + let schema = json!({ + "$id": "json-schema:///schemas/my-schema", + "$schema": "json-schema:///meta/level-a", + "customProperty": "my-schema", + "type": "string" + }); + + // Register all meta-schemas and schema in a chained manner + // All resources are provided upfront, so no external retrieval should occur + Registry::new() + .add( + "json-schema:///meta/level-b", + Resource::from_contents(meta_schema_b), + ) + .expect("Meta-schema should be accepted") + .add( + "json-schema:///meta/level-a", + Resource::from_contents(meta_schema_a), + ) + .expect("Meta-schema should be accepted") + .add( + "json-schema:///schemas/my-schema", + Resource::from_contents(schema), + ) + .expect("Schema should be accepted") + .prepare() + .expect("Chained custom meta-schemas should be accepted when all are registered"); + } + + struct TestRetriever { + schemas: AHashMap, + } + + impl TestRetriever { + fn new(schemas: AHashMap) -> Self { + TestRetriever { schemas } + } + } + + impl Retrieve for TestRetriever { + fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + if let Some(value) = self.schemas.get(uri.as_str()) { + Ok(value.clone()) + } else { + Err(format!("Failed to find {uri}").into()) + } + } + } + + fn create_test_retriever(schemas: &[(&str, Value)]) -> TestRetriever { + TestRetriever::new( + schemas + .iter() + .map(|&(k, ref v)| (k.to_string(), v.clone())) + .collect(), + ) + } + + #[test] + fn test_registry_builder_uses_custom_draft() { + let registry = Registry::new() + .draft(Draft::Draft4) + .add("urn:test", json!({})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let uri = from_str("urn:test").expect("Invalid test URI"); + assert_eq!( + registry.resource_by_uri(&uri).unwrap().draft(), + Draft::Draft4 + ); + } + + #[test] + fn test_registry_builder_uses_custom_retriever() { + let registry = Registry::new() + .retriever(create_test_retriever(&[( + "http://example.com/remote", + json!({"type": "string"}), + )])) + .add( + "http://example.com/root", + json!({"$ref": "http://example.com/remote"}), + ) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(registry.contains_resource("http://example.com/remote")); + } + + struct TestCase { + input_resources: Vec<(&'static str, Value)>, + remote_resources: Vec<(&'static str, Value)>, + expected_resolved_uris: Vec<&'static str>, + } + + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), + ], + remote_resources: vec![ + ("http://example.com/schema2", json!({"type": "object"})), + ], + expected_resolved_uris: vec!["http://example.com/schema1", "http://example.com/schema2"], + } + ;"External ref at top")] + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({ + "$defs": { + "subschema": {"type": "string"} + }, + "$ref": "#/$defs/subschema" + })), + ], + remote_resources: vec![], + expected_resolved_uris: vec!["http://example.com/schema1"], + } + ;"Internal ref at top")] + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), + ("http://example.com/schema2", json!({"type": "object"})), + ], + remote_resources: vec![], + expected_resolved_uris: vec!["http://example.com/schema1", "http://example.com/schema2"], + } + ;"Ref to later resource")] + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({ + "type": "object", + "properties": { + "prop1": {"$ref": "http://example.com/schema2"} + } + })), + ], + remote_resources: vec![ + ("http://example.com/schema2", json!({"type": "string"})), + ], + expected_resolved_uris: vec!["http://example.com/schema1", "http://example.com/schema2"], + } + ;"External ref in subresource")] + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({ + "type": "object", + "properties": { + "prop1": {"$ref": "#/$defs/subschema"} + }, + "$defs": { + "subschema": {"type": "string"} + } + })), + ], + remote_resources: vec![], + expected_resolved_uris: vec!["http://example.com/schema1"], + } + ;"Internal ref in subresource")] + #[test_case( + TestCase { + input_resources: vec![ + ("file:///schemas/main.json", json!({"$ref": "file:///schemas/external.json"})), + ], + remote_resources: vec![ + ("file:///schemas/external.json", json!({"type": "object"})), + ], + expected_resolved_uris: vec!["file:///schemas/main.json", "file:///schemas/external.json"], + } + ;"File scheme: external ref at top")] + #[test_case( + TestCase { + input_resources: vec![ + ("file:///schemas/main.json", json!({"$ref": "subfolder/schema.json"})), + ], + remote_resources: vec![ + ("file:///schemas/subfolder/schema.json", json!({"type": "string"})), + ], + expected_resolved_uris: vec!["file:///schemas/main.json", "file:///schemas/subfolder/schema.json"], + } + ;"File scheme: relative path ref")] + #[test_case( + TestCase { + input_resources: vec![ + ("file:///schemas/main.json", json!({ + "type": "object", + "properties": { + "local": {"$ref": "local.json"}, + "remote": {"$ref": "http://example.com/schema"} + } + })), + ], + remote_resources: vec![ + ("file:///schemas/local.json", json!({"type": "string"})), + ("http://example.com/schema", json!({"type": "number"})), + ], + expected_resolved_uris: vec![ + "file:///schemas/main.json", + "file:///schemas/local.json", + "http://example.com/schema" + ], + } + ;"File scheme: mixing with http scheme")] + #[test_case( + TestCase { + input_resources: vec![ + ("file:///C:/schemas/main.json", json!({"$ref": "/D:/other_schemas/schema.json"})), + ], + remote_resources: vec![ + ("file:///D:/other_schemas/schema.json", json!({"type": "boolean"})), + ], + expected_resolved_uris: vec![ + "file:///C:/schemas/main.json", + "file:///D:/other_schemas/schema.json" + ], + } + ;"File scheme: absolute path in Windows style")] + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), + ], + remote_resources: vec![ + ("http://example.com/schema2", json!({"$ref": "http://example.com/schema3"})), + ("http://example.com/schema3", json!({"$ref": "http://example.com/schema4"})), + ("http://example.com/schema4", json!({"$ref": "http://example.com/schema5"})), + ("http://example.com/schema5", json!({"type": "object"})), + ], + expected_resolved_uris: vec![ + "http://example.com/schema1", + "http://example.com/schema2", + "http://example.com/schema3", + "http://example.com/schema4", + "http://example.com/schema5", + ], + } + ;"Four levels of external references")] + #[test_case( + TestCase { + input_resources: vec![ + ("http://example.com/schema1", json!({"$ref": "http://example.com/schema2"})), + ], + remote_resources: vec![ + ("http://example.com/schema2", json!({"$ref": "http://example.com/schema3"})), + ("http://example.com/schema3", json!({"$ref": "http://example.com/schema4"})), + ("http://example.com/schema4", json!({"$ref": "http://example.com/schema5"})), + ("http://example.com/schema5", json!({"$ref": "http://example.com/schema6"})), + ("http://example.com/schema6", json!({"$ref": "http://example.com/schema1"})), + ], + expected_resolved_uris: vec![ + "http://example.com/schema1", + "http://example.com/schema2", + "http://example.com/schema3", + "http://example.com/schema4", + "http://example.com/schema5", + "http://example.com/schema6", + ], + } + ;"Five levels of external references with circular reference")] + fn test_references_processing(test_case: TestCase) { + let retriever = create_test_retriever(&test_case.remote_resources); + + let input_pairs = test_case + .input_resources + .clone() + .into_iter() + .map(|(uri, value)| (uri, Resource::from_contents(value))); + + let mut registry = Registry::new().retriever(retriever); + for (uri, resource) in input_pairs { + registry = registry.add(uri, resource).expect("Invalid resources"); + } + let registry = registry.prepare().expect("Invalid resources"); + // Verify that all expected URIs are resolved and present in resources + for uri in test_case.expected_resolved_uris { + let resolver = registry.resolver(from_str("").expect("Invalid base URI")); + assert!(resolver.lookup(uri).is_ok()); + } + } + + #[test] + fn test_default_retriever_with_remote_refs() { + let result = Registry::new() + .add( + "http://example.com/schema1", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), + ) + .expect("Resource should be accepted") + .prepare(); + let error = result.expect_err("Should fail"); + assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); + assert!(error.source().is_some()); + } + + #[test] + fn test_registry_new_can_add_and_prepare() { + let registry = Registry::new() + .add("urn:test", json!({"type": "string"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(registry.contains_resource("urn:test")); + } + + #[test] + fn test_prepared_registry_can_be_extended_via_add() { + let original = Registry::new() + .add("urn:one", json!({"type": "string"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let registry = original + .add("urn:two", json!({"type": "integer"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(original.contains_resource("urn:one")); + assert!(!original.contains_resource("urn:two")); + assert!(registry.contains_resource("urn:one")); + assert!(registry.contains_resource("urn:two")); + } + + #[test] + fn test_registry_builder_accepts_borrowed_values() { + let schema = json!({"type": "string"}); + let registry = Registry::new() + .add("urn:test", &schema) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(registry.contains_resource("urn:test")); + } + + #[test] + fn test_registry_builder_accepts_borrowed_resources() { + let schema = Draft::Draft4.create_resource(json!({"type": "string"})); + let registry = Registry::new() + .add("urn:test", &schema) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let uri = from_str("urn:test").expect("Invalid test URI"); + assert_eq!( + registry.resource_by_uri(&uri).unwrap().draft(), + Draft::Draft4 + ); + } + + #[test] + fn test_registry_with_duplicate_input_uris() { + let registry = Registry::new() + .add( + "http://example.com/schema", + json!({ + "type": "object", + "properties": { + "foo": { "type": "string" } + } + }), + ) + .expect("First resource should be accepted") + .add( + "http://example.com/schema", + json!({ + "type": "object", + "properties": { + "bar": { "type": "number" } + } + }), + ) + .expect("Second resource should overwrite the first") + .prepare() + .expect("Registry should prepare"); + + let uri = from_str("http://example.com/schema").expect("Invalid schema URI"); + let resource = registry.resource_by_uri(&uri).unwrap(); + let properties = resource + .contents() + .get("properties") + .and_then(|v| v.as_object()) + .unwrap(); + + assert!( + !properties.contains_key("foo"), + "Registry should replace the earlier explicit input resource" + ); + assert!(properties.contains_key("bar")); + } + + #[test] + fn test_resolver_debug() { + let registry = SPECIFICATIONS + .add("http://example.com", json!({})) + .expect("Invalid resource") + .prepare() + .expect("Invalid resource"); + let resolver = + registry.resolver(from_str("http://127.0.0.1/schema").expect("Invalid base URI")); + assert_eq!( + format!("{resolver:?}"), + "Resolver { base_uri: \"http://127.0.0.1/schema\", scopes: \"[]\" }" + ); + } + + #[test] + fn test_prepare_with_specifications_registry() { + let registry = SPECIFICATIONS + .add("http://example.com", json!({})) + .expect("Invalid resource") + .prepare() + .expect("Invalid resource"); + let resolver = registry.resolver(from_str("").expect("Invalid base URI")); + let resolved = resolver + .lookup("http://json-schema.org/draft-06/schema#/definitions/schemaArray") + .expect("Lookup failed"); + assert_eq!( + resolved.contents(), + &json!({ + "type": "array", + "minItems": 1, + "items": { "$ref": "#" } + }) + ); + } + + #[test] + fn test_prepare_preserves_existing_local_entries() { + let original = Registry::new() + .add( + "http://example.com/root", + Resource::from_contents(json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + })), + ) + .expect("Invalid root schema") + .prepare() + .expect("Invalid root schema"); + + let extended = original + .add( + "http://example.com/other", + Resource::from_contents(json!({"type": "number"})), + ) + .expect("Registry extension should succeed") + .prepare() + .expect("Registry extension should succeed"); + + let resolver = extended.resolver(from_str("").expect("Invalid base URI")); + let embedded = resolver + .lookup("http://example.com/embedded") + .expect("Embedded subresource URI should stay indexed after extension"); + assert_eq!( + embedded.contents(), + &json!({ + "$id": "http://example.com/embedded", + "type": "string" + }) + ); + } + + #[test] + fn test_prepared_registry_can_be_extended_via_extend() { + let original = Registry::new() + .add("urn:one", json!({"type": "string"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let registry = original + .extend([("urn:two", json!({"type": "integer"}))]) + .expect("Resources should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(original.contains_resource("urn:one")); + assert!(!original.contains_resource("urn:two")); + assert!(registry.contains_resource("urn:one")); + assert!(registry.contains_resource("urn:two")); + } + + #[test] + fn test_invalid_reference() { + let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"})); + let _ = Registry::new() + .add("http://#/", resource) + .and_then(crate::registry::RegistryBuilder::prepare); + } +} + +#[cfg(all(test, feature = "retrieve-async"))] +mod async_tests { + use crate::{uri, DefaultRetriever, Draft, Registry, Resource, Uri}; + use ahash::AHashMap; + use serde_json::{json, Value}; + use std::{ + error::Error, + sync::atomic::{AtomicUsize, Ordering}, + }; + + struct TestAsyncRetriever { + schemas: AHashMap, + } + + impl TestAsyncRetriever { + fn with_schema(uri: impl Into, schema: Value) -> Self { + TestAsyncRetriever { + schemas: { AHashMap::from_iter([(uri.into(), schema)]) }, + } + } + } + + #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] + #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] + impl crate::AsyncRetrieve for TestAsyncRetriever { + async fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + self.schemas + .get(uri.as_str()) + .cloned() + .ok_or_else(|| "Schema not found".into()) + } + } + + #[tokio::test] + async fn test_default_async_retriever_with_remote_refs() { + let result = Registry::new() + .async_retriever(DefaultRetriever) + .add( + "http://example.com/schema1", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), + ) + .expect("Resource should be accepted") + .async_prepare() + .await; + + let error = result.expect_err("Should fail"); + assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); + assert!(error.source().is_some()); + } + + #[tokio::test] + async fn test_async_prepare() { + let _registry = Registry::new() + .async_retriever(DefaultRetriever) + .add("", Draft::default().create_resource(json!({}))) + .expect("Invalid resources") + .async_prepare() + .await + .expect("Invalid resources"); + } + + #[tokio::test] + async fn test_async_registry_with_duplicate_input_uris() { + let registry = Registry::new() + .async_retriever(DefaultRetriever) + .add( + "http://example.com/schema", + json!({ + "type": "object", + "properties": { + "foo": { "type": "string" } + } + }), + ) + .expect("First resource should be accepted") + .add( + "http://example.com/schema", + json!({ + "type": "object", + "properties": { + "bar": { "type": "number" } + } + }), + ) + .expect("Second resource should overwrite the first") + .async_prepare() + .await + .expect("Registry should prepare"); + + let uri = uri::from_str("http://example.com/schema").expect("Invalid schema URI"); + let resource = registry.resource_by_uri(&uri).unwrap(); + let properties = resource + .contents() + .get("properties") + .and_then(|v| v.as_object()) + .unwrap(); + + assert!( + !properties.contains_key("foo"), + "Registry should replace the earlier explicit input resource" + ); + assert!(properties.contains_key("bar")); + } + + #[tokio::test] + async fn test_registry_builder_async_prepare_uses_async_retriever() { + let registry = Registry::new() + .async_retriever(TestAsyncRetriever::with_schema( + "http://example.com/schema2", + json!({"type": "object"}), + )) + .add( + "http://example.com", + json!({"$ref": "http://example.com/schema2"}), + ) + .expect("Resource should be accepted") + .async_prepare() + .await + .expect("Registry should prepare"); + + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); + let resolved = resolver + .lookup("http://example.com/schema2") + .expect("Lookup failed"); + assert_eq!(resolved.contents(), &json!({"type": "object"})); + } + + #[tokio::test] + async fn test_async_prepare_with_remote_resource() { + let retriever = TestAsyncRetriever::with_schema( + "http://example.com/schema2", + json!({"type": "object"}), + ); + + let registry = Registry::new() + .async_retriever(retriever) + .add( + "http://example.com", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), + ) + .expect("Invalid resource") + .async_prepare() + .await + .expect("Invalid resource"); + + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); + let resolved = resolver + .lookup("http://example.com/schema2") + .expect("Lookup failed"); + assert_eq!(resolved.contents(), &json!({"type": "object"})); + } + + #[tokio::test] + async fn test_async_prepare_preserves_existing_local_entries() { + let original = Registry::new() + .async_retriever(DefaultRetriever) + .add( + "http://example.com/root", + Resource::from_contents(json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + })), + ) + .expect("Invalid root schema") + .async_prepare() + .await + .expect("Invalid root schema"); + + let extended = original + .add( + "http://example.com/other", + Resource::from_contents(json!({"type": "number"})), + ) + .expect("Registry extension should succeed") + .async_prepare() + .await + .expect("Registry extension should succeed"); + + let resolver = extended.resolver(uri::from_str("").expect("Invalid base URI")); + let embedded = resolver + .lookup("http://example.com/embedded") + .expect("Embedded subresource URI should stay indexed after async extension"); + assert_eq!( + embedded.contents(), + &json!({ + "$id": "http://example.com/embedded", + "type": "string" + }) + ); + } + + #[tokio::test] + async fn test_async_registry_with_multiple_refs() { + let retriever = TestAsyncRetriever { + schemas: AHashMap::from_iter([ + ( + "http://example.com/schema2".to_string(), + json!({"type": "object"}), + ), + ( + "http://example.com/schema3".to_string(), + json!({"type": "string"}), + ), + ]), + }; + + let registry = Registry::new() + .async_retriever(retriever) + .add( + "http://example.com/schema1", + Resource::from_contents(json!({ + "type": "object", + "properties": { + "obj": {"$ref": "http://example.com/schema2"}, + "str": {"$ref": "http://example.com/schema3"} + } + })), + ) + .expect("Invalid resource") + .async_prepare() + .await + .expect("Invalid resource"); + + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); + + // Check both references are resolved correctly + let resolved2 = resolver + .lookup("http://example.com/schema2") + .expect("Lookup failed"); + assert_eq!(resolved2.contents(), &json!({"type": "object"})); + + let resolved3 = resolver + .lookup("http://example.com/schema3") + .expect("Lookup failed"); + assert_eq!(resolved3.contents(), &json!({"type": "string"})); + } + + #[tokio::test] + async fn test_async_registry_with_nested_refs() { + let retriever = TestAsyncRetriever { + schemas: AHashMap::from_iter([ + ( + "http://example.com/address".to_string(), + json!({ + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"$ref": "http://example.com/city"} + } + }), + ), + ( + "http://example.com/city".to_string(), + json!({ + "type": "string", + "minLength": 1 + }), + ), + ]), + }; + + let registry = Registry::new() + .async_retriever(retriever) + .add( + "http://example.com/person", + Resource::from_contents(json!({ + "type": "object", + "properties": { + "name": {"type": "string"}, + "address": {"$ref": "http://example.com/address"} + } + })), + ) + .expect("Invalid resource") + .async_prepare() + .await + .expect("Invalid resource"); + + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); + + // Verify nested reference resolution + let resolved = resolver + .lookup("http://example.com/city") + .expect("Lookup failed"); + assert_eq!( + resolved.contents(), + &json!({"type": "string", "minLength": 1}) + ); + } + + // Multiple refs to the same external schema with different fragments were fetched multiple times in async mode. + #[tokio::test] + async fn test_async_registry_with_duplicate_fragment_refs() { + static FETCH_COUNT: AtomicUsize = AtomicUsize::new(0); + + struct CountingRetriever { + inner: TestAsyncRetriever, + } + + #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] + #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] + impl crate::AsyncRetrieve for CountingRetriever { + async fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + FETCH_COUNT.fetch_add(1, Ordering::SeqCst); + self.inner.retrieve(uri).await + } + } + + FETCH_COUNT.store(0, Ordering::SeqCst); + + let retriever = CountingRetriever { + inner: TestAsyncRetriever::with_schema( + "http://example.com/external", + json!({ + "$defs": { + "foo": { + "type": "object", + "properties": { + "nested": { "type": "string" } + } + }, + "bar": { + "type": "object", + "properties": { + "value": { "type": "integer" } + } + } + } + }), + ), + }; + + // Schema references the same external URL with different fragments + let registry = Registry::new() + .async_retriever(retriever) + .add( + "http://example.com/main", + Resource::from_contents(json!({ + "type": "object", + "properties": { + "name": { "$ref": "http://example.com/external#/$defs/foo" }, + "age": { "$ref": "http://example.com/external#/$defs/bar" } + } + })), + ) + .expect("Invalid resource") + .async_prepare() + .await + .expect("Invalid resource"); + + // Should only fetch the external schema once + let fetches = FETCH_COUNT.load(Ordering::SeqCst); + assert_eq!( + fetches, 1, + "External schema should be fetched only once, but was fetched {fetches} times" + ); + + let resolver = + registry.resolver(uri::from_str("http://example.com/main").expect("Invalid base URI")); + + // Verify both fragment references resolve correctly + let foo = resolver + .lookup("http://example.com/external#/$defs/foo") + .expect("Lookup failed"); + assert_eq!( + foo.contents(), + &json!({ + "type": "object", + "properties": { + "nested": { "type": "string" } + } + }) + ); + + let bar = resolver + .lookup("http://example.com/external#/$defs/bar") + .expect("Lookup failed"); + assert_eq!( + bar.contents(), + &json!({ + "type": "object", + "properties": { + "value": { "type": "integer" } + } + }) + ); + } +} diff --git a/crates/jsonschema-referencing/src/registry/index.rs b/crates/jsonschema-referencing/src/registry/index.rs new file mode 100644 index 00000000..afda8a6d --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/index.rs @@ -0,0 +1,197 @@ +//! Lookup structures produced by the build pass. +//! +//! Each [`IndexedResource`] and [`IndexedAnchor`] has two variants: +//! - `Borrowed`: the schema value is referenced in place from the caller — no allocation. +//! - `Owned`: the schema value is stored behind an [`Arc`]; a [`ParsedPointer`] locates +//! the sub-schema within the document. +//! +//! This split avoids cloning schema values when the caller's data outlives the registry. + +use std::sync::Arc; + +use fluent_uri::Uri; + +use serde_json::Value; + +use crate::{ + anchor::Anchor, draft::Draft, pointer::ParsedPointer, small_map::SmallMap, ResourceRef, +}; + +use super::build::StoredDocument; + +/// Lookup tables mapping canonical URIs to resources and anchors. +#[derive(Debug, Clone, Default)] +pub(super) struct Index<'a> { + pub(super) resources: SmallMap>, IndexedResource<'a>>, + pub(super) anchors: SmallMap>, SmallMap, IndexedAnchor<'a>>>, +} + +/// A schema resource in the index: either borrowed from the caller or owned by the registry. +#[derive(Debug, Clone)] +pub(super) enum IndexedResource<'a> { + Borrowed(ResourceRef<'a>), + Owned { + document: Arc>, + pointer: ParsedPointer, + draft: Draft, + }, +} + +impl IndexedResource<'_> { + #[inline] + pub(super) fn resolve(&self) -> Option> { + match self { + IndexedResource::Borrowed(resource) => { + Some(ResourceRef::new(resource.contents(), resource.draft())) + } + IndexedResource::Owned { + document, + pointer, + draft, + } => { + let contents = pointer.lookup(document.contents())?; + Some(ResourceRef::new(contents, *draft)) + } + } + } +} + +/// An anchor in the index: either borrowed from the caller or owned by the registry. +#[derive(Debug, Clone)] +pub(super) enum IndexedAnchor<'a> { + Borrowed(Anchor<'a>), + Owned { + document: Arc>, + pointer: ParsedPointer, + draft: Draft, + kind: AnchorKind, + name: Box, + }, +} + +impl IndexedAnchor<'_> { + #[inline] + pub(super) fn resolve(&self) -> Option> { + match self { + IndexedAnchor::Borrowed(anchor) => Some(match anchor { + Anchor::Default { name, resource } => Anchor::Default { + name, + resource: ResourceRef::new(resource.contents(), resource.draft()), + }, + Anchor::Dynamic { name, resource } => Anchor::Dynamic { + name, + resource: ResourceRef::new(resource.contents(), resource.draft()), + }, + }), + IndexedAnchor::Owned { + document, + pointer, + draft, + kind, + name, + } => { + let contents = pointer.lookup(document.contents())?; + let resource = ResourceRef::new(contents, *draft); + Some(match kind { + AnchorKind::Default => Anchor::Default { name, resource }, + AnchorKind::Dynamic => Anchor::Dynamic { name, resource }, + }) + } + } + } +} + +/// Whether an anchor is a plain anchor (`$anchor`) or a dynamic anchor (`$dynamicAnchor`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum AnchorKind { + Default, + Dynamic, +} + +impl<'a> Index<'a> { + /// Register a document: insert its resource entry and all its anchors. + pub(super) fn register_document( + &mut self, + key: &Arc>, + document: &Arc>, + ) { + if let Some(contents) = document.borrowed_contents() { + self.register_borrowed_subresource(key, document.draft(), true, contents); + } else { + let pointer = ParsedPointer::default(); + self.register_owned_subresource( + key, + document, + &pointer, + document.draft(), + true, + document.contents(), + ); + } + } + + /// Register a subresource discovered during BFS traversal of a borrowed document. + /// If `has_id` is true, the subresource is also registered as a resource entry. + pub(super) fn register_borrowed_subresource( + &mut self, + key: &Arc>, + draft: Draft, + has_id: bool, + contents: &'a Value, + ) { + if has_id { + self.resources.insert( + Arc::clone(key), + IndexedResource::Borrowed(ResourceRef::new(contents, draft)), + ); + } + let anchors = self.anchors.get_or_insert_default(Arc::clone(key)); + for anchor in draft.anchors(contents) { + anchors.insert( + anchor.name().to_string().into_boxed_str(), + IndexedAnchor::Borrowed(anchor), + ); + } + } + + /// Register a subresource discovered during BFS traversal of an owned document. + /// If `has_id` is true, the subresource is also registered as a resource entry. + pub(super) fn register_owned_subresource( + &mut self, + key: &Arc>, + document: &Arc>, + pointer: &ParsedPointer, + draft: Draft, + has_id: bool, + contents: &Value, + ) { + if has_id { + self.resources.insert( + Arc::clone(key), + IndexedResource::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft, + }, + ); + } + let anchors = self.anchors.get_or_insert_default(Arc::clone(key)); + for anchor in draft.anchors(contents) { + let (name, kind) = match anchor { + Anchor::Default { name, .. } => (name, AnchorKind::Default), + Anchor::Dynamic { name, .. } => (name, AnchorKind::Dynamic), + }; + let name = name.to_string().into_boxed_str(); + anchors.insert( + name.clone(), + IndexedAnchor::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft, + kind, + name, + }, + ); + } + } +} diff --git a/crates/jsonschema-referencing/src/registry/input.rs b/crates/jsonschema-referencing/src/registry/input.rs new file mode 100644 index 00000000..02884e7b --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/input.rs @@ -0,0 +1,133 @@ +//! Input normalisation for resources entering the registry. +//! +//! [`PendingResource`] is a 2x2 enum covering every combination of: +//! - **value type**: raw [`Value`] vs typed [`Resource`] / [`ResourceRef`] +//! - **ownership**: owned (caller transfers) vs borrowed (caller retains, lifetime `'a`) +//! +//! [`IntoRegistryResource`] is sealed so only the four approved input forms can enter +//! the pipeline — implementing it from outside the crate is intentionally prevented. + +use std::sync::Arc; + +use ahash::AHashMap; +use fluent_uri::Uri; +use serde_json::Value; + +use crate::{Resource, ResourceRef, Retrieve}; + +/// A resource waiting to enter the registry. +#[derive(Clone)] +pub(crate) enum PendingResource<'a> { + Value(Value), + ValueRef(&'a Value), + Resource(Resource), + ResourceRef(ResourceRef<'a>), +} + +pub(crate) mod private { + use ahash::AHashMap; + use fluent_uri::Uri; + + use super::PendingResource; + + pub(crate) trait Sealed<'a> { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ); + } +} + +#[allow(private_bounds)] +pub trait IntoRegistryResource<'a>: private::Sealed<'a> {} + +impl<'a, T> IntoRegistryResource<'a> for T where T: private::Sealed<'a> {} + +impl<'a> private::Sealed<'a> for Resource { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::Resource(self)); + } +} + +impl<'a> private::Sealed<'a> for &'a Resource { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert( + uri, + PendingResource::ResourceRef(ResourceRef::new(self.contents(), self.draft())), + ); + } +} + +impl<'a> private::Sealed<'a> for &'a Value { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::ValueRef(self)); + } +} + +impl<'a> private::Sealed<'a> for ResourceRef<'a> { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::ResourceRef(self)); + } +} + +impl<'a> private::Sealed<'a> for Value { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::Value(self)); + } +} + +pub trait IntoRetriever { + fn into_retriever(self) -> Arc; +} + +impl IntoRetriever for T { + fn into_retriever(self) -> Arc { + Arc::new(self) + } +} + +impl IntoRetriever for Arc { + fn into_retriever(self) -> Arc { + self + } +} + +#[cfg(feature = "retrieve-async")] +pub trait IntoAsyncRetriever { + fn into_retriever(self) -> Arc; +} + +#[cfg(feature = "retrieve-async")] +impl IntoAsyncRetriever for T { + fn into_retriever(self) -> Arc { + Arc::new(self) + } +} + +#[cfg(feature = "retrieve-async")] +impl IntoAsyncRetriever for Arc { + fn into_retriever(self) -> Arc { + self + } +} diff --git a/crates/jsonschema-referencing/src/registry/mod.rs b/crates/jsonschema-referencing/src/registry/mod.rs new file mode 100644 index 00000000..008f0e86 --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/mod.rs @@ -0,0 +1,441 @@ +use std::{ + fmt, + sync::{Arc, LazyLock}, +}; + +use ahash::AHashMap; +use fluent_uri::Uri; +use serde_json::Value; + +use crate::{ + cache::{SharedUriCache, UriCache}, + meta, uri, + vocabularies::{self, VocabularySet}, + Anchor, DefaultRetriever, Draft, Error, Resolver, ResourceRef, Retrieve, +}; + +mod build; +use build::{DocumentStore, KnownResources, StoredDocument}; + +mod index; +use index::{Index, IndexedAnchor, IndexedResource}; + +mod input; +#[cfg(feature = "retrieve-async")] +pub(crate) use input::IntoAsyncRetriever; +pub use input::IntoRegistryResource; +pub(crate) use input::{IntoRetriever, PendingResource}; + +/// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies +pub static SPECIFICATIONS: LazyLock> = + LazyLock::new(|| Registry::from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); + +#[derive(Clone)] +pub struct RegistryBuilder<'a> { + baseline: Option<&'a Registry<'a>>, + pending: AHashMap, PendingResource<'a>>, + retriever: Arc, + #[cfg(feature = "retrieve-async")] + async_retriever: Option>, + draft: Option, +} + +impl fmt::Debug for RegistryBuilder<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RegistryBuilder") + .field("has_baseline", &self.baseline.is_some()) + .field("pending_len", &self.pending.len()) + .field("draft", &self.draft) + .finish() + } +} + +/// A registry of JSON Schema resources, each identified by their canonical URIs. +/// +/// `Registry` is a prepared registry: add resources with [`Registry::new`] and +/// [`RegistryBuilder::add`], then call [`RegistryBuilder::prepare`] to build the +/// reusable registry. To resolve `$ref` references directly, create a [`Resolver`] +/// from the prepared registry: +/// +/// ```rust +/// use referencing::Registry; +/// +/// # fn main() -> Result<(), Box> { +/// let schema = serde_json::json!({ +/// "$schema": "https://json-schema.org/draft/2020-12/schema", +/// "$id": "https://example.com/root", +/// "$defs": { "item": { "type": "string" } }, +/// "items": { "$ref": "#/$defs/item" } +/// }); +/// +/// let registry = Registry::new() +/// .add("https://example.com/root", schema)? +/// .prepare()?; +/// +/// let resolver = registry.resolver(referencing::uri::from_str("https://example.com/root")?); +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug, Clone)] +pub struct Registry<'a> { + baseline: Option<&'a Registry<'a>>, + resolution_cache: SharedUriCache, + known_resources: KnownResources, + index: Index<'a>, +} + +impl<'a> RegistryBuilder<'a> { + fn new() -> Self { + Self { + baseline: None, + pending: AHashMap::new(), + retriever: Arc::new(DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: None, + } + } + + fn from_registry(registry: &'a Registry<'a>) -> Self { + Self { + baseline: Some(registry), + pending: AHashMap::new(), + retriever: Arc::new(DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: None, + } + } + + #[must_use] + pub fn draft(mut self, draft: Draft) -> Self { + self.draft = Some(draft); + self + } + + #[must_use] + pub fn retriever(mut self, retriever: impl IntoRetriever) -> Self { + self.retriever = retriever.into_retriever(); + self + } + + #[cfg(feature = "retrieve-async")] + #[must_use] + pub fn async_retriever(mut self, retriever: impl IntoAsyncRetriever) -> Self { + self.async_retriever = Some(retriever.into_retriever()); + self + } + + /// Add a resource to the registry builder. + /// + /// # Errors + /// + /// Returns an error if the URI is invalid. + pub fn add<'b>( + self, + uri: impl AsRef, + resource: impl IntoRegistryResource<'b>, + ) -> Result, Error> + where + 'a: 'b, + { + let parsed = uri::from_str(uri.as_ref().trim_end_matches('#'))?; + let mut pending: AHashMap, PendingResource<'b>> = + self.pending.into_iter().collect(); + input::private::Sealed::insert_into(resource, &mut pending, parsed); + Ok(RegistryBuilder { + baseline: self.baseline, + pending, + retriever: self.retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever, + draft: self.draft, + }) + } + + /// Add multiple resources to the registry builder. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid. + pub fn extend<'b, I, U, T>(self, pairs: I) -> Result, Error> + where + 'a: 'b, + I: IntoIterator, + U: AsRef, + T: IntoRegistryResource<'b>, + { + let mut builder = RegistryBuilder { + baseline: self.baseline, + pending: self.pending.into_iter().collect(), + retriever: self.retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever, + draft: self.draft, + }; + for (uri, resource) in pairs { + builder = builder.add(uri, resource)?; + } + Ok(builder) + } + + /// Prepare the registry for reuse. + /// + /// # Errors + /// + /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. + pub fn prepare(self) -> Result, Error> { + // When extending an existing registry, seed known resources from the baseline so the + // retriever skips URIs already owned by the parent. + let mut known_resources = self + .baseline + .map(|b| b.known_resources.clone()) + .unwrap_or_default(); + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let (custom_metaschemas, index_data) = build::index_resources( + self.pending, + &*self.retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + self.draft, + )?; + build::validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + Ok(Registry { + baseline: self.baseline, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index: index_data, + }) + } + + #[cfg(feature = "retrieve-async")] + /// Prepare the registry for reuse with async retrieval. + /// + /// # Errors + /// + /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. + pub async fn async_prepare(self) -> Result, Error> { + let retriever = self + .async_retriever + .unwrap_or_else(|| Arc::new(DefaultRetriever)); + let mut known_resources = self + .baseline + .map(|b| b.known_resources.clone()) + .unwrap_or_default(); + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let (custom_metaschemas, index_data) = build::index_resources_async( + self.pending, + &*retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + self.draft, + ) + .await?; + build::validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + Ok(Registry { + baseline: self.baseline, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index: index_data, + }) + } +} + +impl<'a> Registry<'a> { + #[allow(clippy::new_ret_no_self)] + #[must_use] + pub fn new<'b>() -> RegistryBuilder<'b> { + RegistryBuilder::new() + } + /// Add a resource to a prepared registry, returning a builder that must be prepared again. + /// + /// # Errors + /// + /// Returns an error if the URI is invalid. + pub fn add<'b>( + &'b self, + uri: impl AsRef, + resource: impl IntoRegistryResource<'b>, + ) -> Result, Error> + where + 'a: 'b, + { + RegistryBuilder::from_registry(self).add(uri, resource) + } + + /// Add multiple resources to a prepared registry, returning a builder that + /// must be prepared again. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid. + pub fn extend<'b, I, U, T>(&'b self, pairs: I) -> Result, Error> + where + 'a: 'b, + I: IntoIterator, + U: AsRef, + T: IntoRegistryResource<'b>, + { + RegistryBuilder::from_registry(self).extend(pairs) + } + + /// Build a registry with all the given meta-schemas from specs. + pub(crate) fn from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { + let mut documents = DocumentStore::with_capacity(schemas.len()); + let mut known_resources = KnownResources::with_capacity(schemas.len()); + + for (uri, schema) in schemas { + let parsed = + uri::from_str(uri.trim_end_matches('#')).expect("meta-schema URI must be valid"); + let key = Arc::new(parsed); + let draft = Draft::default().detect(schema); + known_resources.insert((*key).clone()); + documents.insert(key, Arc::new(StoredDocument::borrowed(schema, draft))); + } + + let mut resolution_cache = UriCache::with_capacity(35); + let index_data = + build::build_prepared_index_for_documents(&documents, &mut resolution_cache) + .expect("meta-schema index data must build"); + + Self { + baseline: None, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index: index_data, + } + } + /// Returns `true` if the registry contains a resource at the given URI. + /// + /// Returns `false` if the URI is malformed. + #[must_use] + pub fn contains_resource(&self, uri: &str) -> bool { + let Ok(uri) = uri::from_str(uri) else { + return false; + }; + self.resource_by_uri(&uri).is_some() + } + + /// Returns `true` if the registry contains an anchor named `name` at the given URI. + /// + /// Returns `false` if the URI is malformed. + #[must_use] + pub fn contains_anchor(&self, uri: &str, name: &str) -> bool { + let Ok(uri) = uri::from_str(uri) else { + return false; + }; + self.contains_anchor_uri(&uri, name) + } + + /// Creates a [`Resolver`] rooted at `base_uri`. + /// + /// The returned resolver borrows from this registry and cannot outlive it. + #[must_use] + pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { + Resolver::new(self, Arc::new(base_uri)) + } + + /// Returns the vocabulary set active for a schema with the given `contents`. + /// + /// Detects the draft from the `$schema` field. If no draft is detected or + /// the draft has no registered vocabularies, returns the default vocabulary + /// set — never errors. + #[must_use] + pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { + match draft.detect(contents) { + Draft::Unknown => { + if let Some(specification) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|s| s.as_str()) + { + if let Ok(mut uri) = uri::from_str(specification) { + uri.set_fragment(None); + if let Some(resource) = self.resource_by_uri(&uri) { + if let Ok(Some(vocabularies)) = vocabularies::find(resource.contents()) + { + return vocabularies; + } + } + } + } + Draft::Unknown.default_vocabularies() + } + draft => draft.default_vocabularies(), + } + } + + /// Resolves `uri` against `base` and returns the resulting absolute URI. + /// + /// Results are cached. Returns an error if `base` has no scheme or if + /// resolution fails. + /// + /// # Errors + /// + /// Returns an error if base has no schema or there is a fragment. + pub fn resolve_uri(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { + self.resolution_cache.resolve_against(base, uri) + } + + #[inline] + pub(crate) fn resource_by_uri(&self, uri: &Uri) -> Option> { + self.index + .resources + .get(uri) + .and_then(IndexedResource::resolve) + .or_else(|| { + self.baseline + .and_then(|baseline| baseline.resource_by_uri(uri)) + }) + } + + pub(crate) fn contains_anchor_uri(&self, uri: &Uri, name: &str) -> bool { + self.index + .anchors + .get(uri) + .is_some_and(|entries| entries.contains_key(name)) + || self + .baseline + .is_some_and(|baseline| baseline.contains_anchor_uri(uri, name)) + } + + pub(crate) fn anchor(&self, uri: &Uri, name: &str) -> Result, Error> { + if let Some(anchor) = self.anchor_exact(uri, name) { + return Ok(anchor); + } + + if let Some(resource) = self.resource_by_uri(uri) { + if let Some(id) = resource.id() { + let canonical = uri::from_str(id)?; + if let Some(anchor) = self.anchor_exact(&canonical, name) { + return Ok(anchor); + } + } + } + + if name.contains('/') { + Err(Error::invalid_anchor(name.to_string())) + } else { + Err(Error::no_such_anchor(name.to_string())) + } + } + + fn local_anchor_by_uri(&self, uri: &Uri, name: &str) -> Option> { + self.index + .anchors + .get(uri) + .and_then(|entries| entries.get(name)) + .and_then(IndexedAnchor::resolve) + } + + fn anchor_exact(&self, uri: &Uri, name: &str) -> Option> { + self.local_anchor_by_uri(uri, name).or_else(|| { + self.baseline + .and_then(|baseline| baseline.anchor_exact(uri, name)) + }) + } +} diff --git a/crates/jsonschema-referencing/src/resolver.rs b/crates/jsonschema-referencing/src/resolver.rs index e3d39ee1..f567ccde 100644 --- a/crates/jsonschema-referencing/src/resolver.rs +++ b/crates/jsonschema-referencing/src/resolver.rs @@ -4,14 +4,14 @@ use std::sync::Arc; use fluent_uri::Uri; use serde_json::Value; -use crate::{list::List, resource::JsonSchemaResource, Draft, Error, Registry, ResourceRef}; +use crate::{list::List, Anchor, Draft, Error, Registry, ResourceRef, VocabularySet}; /// A reference resolver. /// /// Resolves references against the base URI and looks up the result in the registry. #[derive(Clone)] pub struct Resolver<'r> { - pub(crate) registry: &'r Registry, + pub(crate) registry: &'r Registry<'r>, base_uri: Arc>, scopes: List>, } @@ -46,14 +46,17 @@ impl fmt::Debug for Resolver<'_> { impl<'r> Resolver<'r> { /// Create a new `Resolver` with the given registry and base URI. - pub(crate) fn new(registry: &'r Registry, base_uri: Arc>) -> Self { + #[inline] + pub(crate) fn new(registry: &'r Registry<'r>, base_uri: Arc>) -> Self { Self { registry, base_uri, scopes: List::new(), } } + /// Returns the current base URI of this resolver. #[must_use] + #[inline] pub fn base_uri(&self) -> Arc> { self.base_uri.clone() } @@ -71,13 +74,11 @@ impl<'r> Resolver<'r> { } else { (reference, "") }; - let uri = self - .registry - .resolve_against(&self.base_uri.borrow(), uri)?; + let uri = self.registry.resolve_uri(&self.base_uri.borrow(), uri)?; (uri, fragment) }; - let Some(retrieved) = self.registry.resources.get(&*uri) else { + let Some(retrieved) = self.registry.resource_by_uri(&uri) else { return Err(Error::unretrievable( uri.as_str(), "Retrieving external resources is not supported once the registry is populated" @@ -91,7 +92,7 @@ impl<'r> Resolver<'r> { } if !fragment.is_empty() { - let retrieved = self.registry.anchor(&uri, fragment)?; + let retrieved = self.lookup_anchor(&uri, fragment)?; let resolver = self.evolve(uri); return retrieved.resolve(resolver); } @@ -146,21 +147,25 @@ impl<'r> Resolver<'r> { Ok(resolved) } + + #[inline] + pub(crate) fn lookup_anchor<'a>( + &self, + uri: &'a Uri, + name: &'a str, + ) -> Result, Error> { + self.registry.anchor(uri, name) + } + /// Create a resolver for a subresource. /// /// # Errors /// /// Returns an error if the resource id cannot be resolved against the base URI of this resolver. + #[inline] pub fn in_subresource(&self, subresource: ResourceRef<'_>) -> Result { - self.in_subresource_inner(&subresource) - } - - pub(crate) fn in_subresource_inner( - &self, - subresource: &impl JsonSchemaResource, - ) -> Result { if let Some(id) = subresource.id() { - let base_uri = self.registry.resolve_against(&self.base_uri.borrow(), id)?; + let base_uri = self.registry.resolve_uri(&self.base_uri.borrow(), id)?; Ok(Resolver { registry: self.registry, base_uri, @@ -171,9 +176,11 @@ impl<'r> Resolver<'r> { } } #[must_use] + #[inline] pub fn dynamic_scope(&self) -> List> { self.scopes.clone() } + #[inline] fn evolve(&self, base_uri: Arc>) -> Resolver<'r> { if !self.base_uri.as_str().is_empty() && (self.scopes.is_empty() || base_uri != self.base_uri) @@ -196,8 +203,14 @@ impl<'r> Resolver<'r> { /// # Errors /// /// If the reference is invalid. - pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { - self.registry.resolve_against(base, uri) + #[inline] + pub fn resolve_uri(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { + self.registry.resolve_uri(base, uri) + } + + #[must_use] + pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { + self.registry.find_vocabularies(draft, contents) } } diff --git a/crates/jsonschema-referencing/src/resource.rs b/crates/jsonschema-referencing/src/resource.rs index becb8e7b..938f7afe 100644 --- a/crates/jsonschema-referencing/src/resource.rs +++ b/crates/jsonschema-referencing/src/resource.rs @@ -1,21 +1,8 @@ -use std::{ - borrow::Cow, - sync::atomic::{AtomicPtr, Ordering}, -}; +use std::borrow::Cow; use serde_json::Value; -use crate::{Anchor, Draft, Error, Resolved, Resolver, Segments}; - -pub(crate) trait JsonSchemaResource { - fn contents(&self) -> &Value; - fn draft(&self) -> Draft; - fn id(&self) -> Option<&str> { - self.draft() - .id_of(self.contents()) - .map(|id| id.trim_end_matches('#')) - } -} +use crate::{Draft, Error, Resolved, Resolver, Segments}; /// An owned document with a concrete interpretation under a JSON Schema specification. #[derive(Debug, Clone, PartialEq, Eq)] @@ -25,19 +12,23 @@ pub struct Resource { } impl Resource { + #[inline] pub(crate) fn new(contents: Value, draft: Draft) -> Self { Self { contents, draft } } + #[inline] pub(crate) fn into_inner(self) -> (Draft, Value) { (self.draft, self.contents) } /// Resource contents. #[must_use] + #[inline] pub fn contents(&self) -> &Value { &self.contents } /// JSON Schema draft under which this contents is interpreted. #[must_use] + #[inline] pub fn draft(&self) -> Draft { self.draft } @@ -45,6 +36,7 @@ impl Resource { /// /// Unknown `$schema` values are treated as `Draft::Unknown`. #[must_use] + #[inline] pub fn from_contents(contents: Value) -> Resource { Draft::default().detect(&contents).create_resource(contents) } @@ -59,14 +51,17 @@ pub struct ResourceRef<'a> { impl<'a> ResourceRef<'a> { #[must_use] + #[inline] pub fn new(contents: &'a Value, draft: Draft) -> Self { Self { contents, draft } } #[must_use] + #[inline] pub fn contents(&self) -> &'a Value { self.contents } #[must_use] + #[inline] pub fn draft(&self) -> Draft { self.draft } @@ -75,81 +70,29 @@ impl<'a> ResourceRef<'a> { /// /// Unknown `$schema` values are treated as `Draft::Unknown`. #[must_use] + #[inline] pub fn from_contents(contents: &'a Value) -> Self { let draft = Draft::default().detect(contents); Self::new(contents, draft) } #[must_use] + #[inline] pub fn id(&self) -> Option<&str> { - JsonSchemaResource::id(self) - } -} - -impl JsonSchemaResource for ResourceRef<'_> { - fn contents(&self) -> &Value { - self.contents - } - - fn draft(&self) -> Draft { self.draft + .id_of(self.contents) + .map(|id| id.trim_end_matches('#')) } } -/// A pointer to a pinned resource. -pub(crate) struct InnerResourcePtr { - contents: AtomicPtr, - draft: Draft, -} - -impl Clone for InnerResourcePtr { - fn clone(&self) -> Self { - Self { - contents: AtomicPtr::new(self.contents.load(Ordering::Relaxed)), - draft: self.draft, - } - } -} - -impl std::fmt::Debug for InnerResourcePtr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("InnerResourcePtr") - .field("contents", self.contents()) - .field("draft", &self.draft) - .finish() - } -} - -impl InnerResourcePtr { - pub(crate) fn new(contents: *const Value, draft: Draft) -> Self { - Self { - contents: AtomicPtr::new(contents.cast_mut()), - draft, - } - } - - #[allow(unsafe_code)] - pub(crate) fn contents(&self) -> &Value { - // SAFETY: The pointer is valid as long as the registry exists - unsafe { &*self.contents.load(Ordering::Relaxed) } - } - - #[inline] - pub(crate) fn draft(&self) -> Draft { - self.draft - } - - pub(crate) fn anchors(&self) -> impl Iterator + '_ { - self.draft().anchors(self.contents()) - } - - pub(crate) fn pointer<'r>( - &'r self, +impl<'r> ResourceRef<'r> { + pub(crate) fn pointer( + self, pointer: &str, mut resolver: Resolver<'r>, ) -> Result, Error> { // INVARIANT: Pointer always starts with `/` - let mut contents = self.contents(); + let mut contents = self.contents; let mut segments = Segments::new(); let original_pointer = pointer; let pointer = percent_encoding::percent_decode_str(&pointer[1..]) @@ -176,27 +119,17 @@ impl InnerResourcePtr { segments.push(segment); } let last = &resolver; - let new_resolver = self.draft().maybe_in_subresource( + let new_resolver = self.draft.maybe_in_subresource( &segments, &resolver, - &InnerResourcePtr::new(contents, self.draft()), + ResourceRef::new(contents, self.draft), )?; if new_resolver != *last { segments = Segments::new(); } resolver = new_resolver; } - Ok(Resolved::new(contents, resolver, self.draft())) - } -} - -impl JsonSchemaResource for InnerResourcePtr { - fn contents(&self) -> &Value { - self.contents() - } - - fn draft(&self) -> Draft { - self.draft + Ok(Resolved::new(contents, resolver, self.draft)) } } @@ -251,9 +184,9 @@ pub fn unescape_segment(mut segment: &str) -> Cow<'_, str> { #[cfg(test)] mod tests { - use std::{error::Error, sync::Arc}; + use std::error::Error; - use crate::{resource::InnerResourcePtr, Draft, Registry}; + use crate::{Draft, Registry}; use super::unescape_segment; use serde_json::json; @@ -292,7 +225,7 @@ mod tests { assert_eq!(unescaped, double_replaced, "Failed for: {input}"); } - fn create_test_registry() -> Registry { + fn create_test_registry() -> Registry<'static> { let schema = Draft::Draft202012.create_resource(json!({ "type": "object", "properties": { @@ -300,7 +233,11 @@ mod tests { "bar": { "type": "array", "items": [{"type": "number"}, {"type": "boolean"}] } } })); - Registry::try_new("http://example.com", schema).expect("Invalid resources") + Registry::new() + .add("http://example.com", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources") } #[test] @@ -311,38 +248,23 @@ mod tests { "foo": { "type": "string" } } })); - let registry = - Registry::try_new("http://example.com", schema.clone()).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", &schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let resolved = resolver.lookup("#").expect("Lookup failed"); assert_eq!(resolved.contents(), schema.contents()); } - #[test] - fn test_inner_resource_ptr_debug() { - let value = Arc::pin(json!({ - "foo": "bar", - "number": 42 - })); - - let ptr = InnerResourcePtr::new(std::ptr::addr_of!(*value), Draft::Draft202012); - - let expected = format!( - "InnerResourcePtr {{ contents: {:?}, draft: Draft202012 }}", - *value - ); - assert_eq!(format!("{ptr:?}"), expected); - } - #[test] fn test_percent_encoded_non_utf8() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/%FF"); let error = result.expect_err("Should fail"); @@ -357,8 +279,7 @@ mod tests { fn test_array_index_as_string() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/properties/bar/items/one"); let error = result.expect_err("Should fail"); @@ -373,8 +294,7 @@ mod tests { fn test_array_index_out_of_bounds() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/properties/bar/items/2"); assert_eq!( @@ -387,8 +307,7 @@ mod tests { fn test_unknown_property() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/properties/baz"); assert_eq!( diff --git a/crates/jsonschema-referencing/src/small_map.rs b/crates/jsonschema-referencing/src/small_map.rs new file mode 100644 index 00000000..cde274f5 --- /dev/null +++ b/crates/jsonschema-referencing/src/small_map.rs @@ -0,0 +1,259 @@ +use std::mem; + +use ahash::AHashMap; + +pub(crate) enum SmallMap { + Small(micromap::Map), + Large(AHashMap), +} + +impl SmallMap { + #[inline] + pub(crate) fn new() -> Self { + SmallMap::Small(micromap::Map::new()) + } + + #[inline] + pub(crate) fn get(&self, key: &Q) -> Option<&V> + where + K: std::borrow::Borrow + Eq + std::hash::Hash, + Q: std::hash::Hash + Eq + ?Sized, + { + match self { + SmallMap::Small(map) => map.get(key), + SmallMap::Large(map) => map.get(key), + } + } + + #[inline] + pub(crate) fn insert(&mut self, key: K, value: V) + where + K: Eq + std::hash::Hash, + { + match self { + SmallMap::Small(map) => { + // Fits inline (new key with space) or overwrites existing key. + if map.len() < N || map.get(&key).is_some() { + map.insert(key, value); + return; + } + // Full and key is new — fall through to promotion. + } + SmallMap::Large(map) => { + map.insert(key, value); + return; + } + } + // Promotion: atomically swap self to Large, drain old Small into it. + let old = match mem::replace(self, SmallMap::Large(AHashMap::with_capacity(N + 1))) { + SmallMap::Small(m) => m, + SmallMap::Large(_) => unreachable!(), + }; + if let SmallMap::Large(new_map) = self { + for (k, v) in old { + new_map.insert(k, v); + } + new_map.insert(key, value); + } + } + + #[inline] + pub(crate) fn get_or_insert_default(&mut self, key: K) -> &mut V + where + K: Eq + std::hash::Hash, + V: Default, + { + // Determine whether we need to promote before borrowing map contents. + let needs_promotion = match self { + SmallMap::Small(map) => map.len() >= N && map.get(&key).is_none(), + SmallMap::Large(_) => false, + }; + if needs_promotion { + // Promotion (same pattern as insert). + let old = match mem::replace(self, SmallMap::Large(AHashMap::with_capacity(N + 1))) { + SmallMap::Small(m) => m, + SmallMap::Large(_) => unreachable!(), + }; + if let SmallMap::Large(new_map) = self { + for (k, v) in old { + new_map.insert(k, v); + } + return new_map.entry(key).or_default(); + } + unreachable!() + } + match self { + SmallMap::Small(map) => map.entry(key).or_default(), + SmallMap::Large(map) => map.entry(key).or_default(), + } + } + + #[inline] + pub(crate) fn contains_key(&self, key: &Q) -> bool + where + K: std::borrow::Borrow + Eq + std::hash::Hash, + Q: std::hash::Hash + Eq + ?Sized, + { + self.get(key).is_some() + } +} + +impl Default for SmallMap { + fn default() -> Self { + Self::new() + } +} + +impl Clone for SmallMap { + fn clone(&self) -> Self { + match self { + SmallMap::Small(map) => SmallMap::Small(map.clone()), + SmallMap::Large(map) => SmallMap::Large(map.clone()), + } + } +} + +impl std::fmt::Debug for SmallMap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SmallMap::Small(map) => write!(f, "{map:?}"), + SmallMap::Large(map) => write!(f, "{map:?}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_is_small() { + let map: SmallMap = SmallMap::new(); + assert!(matches!(map, SmallMap::Small(_))); + } + + #[test] + fn test_insert_and_get() { + let mut map: SmallMap = SmallMap::new(); + map.insert("key".to_string(), 42); + assert_eq!(map.get("key"), Some(&42)); + assert_eq!(map.get("missing"), None); + } + + #[test] + fn test_duplicate_key_overwrites() { + let mut map: SmallMap = SmallMap::new(); + map.insert("key".to_string(), 1); + map.insert("key".to_string(), 2); + assert_eq!(map.get("key"), Some(&2)); + // Verify no duplicate was added: inserting the same key again should still return 2 + map.insert("key".to_string(), 2); + assert_eq!(map.get("key"), Some(&2)); + } + + #[test] + fn test_multiple_inserts_stay_small() { + let mut map: SmallMap = SmallMap::new(); + for i in 0..4 { + map.insert(i, i * 10); + } + assert!(matches!(map, SmallMap::Small(_))); + for i in 0..4 { + assert_eq!(map.get(&i), Some(&(i * 10))); + } + } + + #[test] + fn test_promotion_at_n_plus_1() { + let mut map: SmallMap = SmallMap::new(); + for i in 0..5 { + map.insert(i, i * 10); + } + assert!(matches!(map, SmallMap::Large(_))); + for i in 0..5 { + assert_eq!(map.get(&i), Some(&(i * 10))); + } + } + + #[test] + fn test_contains_key_small() { + let mut map: SmallMap = SmallMap::new(); + map.insert("a".to_string(), 1); + assert!(map.contains_key("a")); + assert!(!map.contains_key("b")); + } + + #[test] + fn test_contains_key_large() { + let mut map: SmallMap = SmallMap::new(); + map.insert(1, 10); + map.insert(2, 20); + map.insert(3, 30); // triggers promotion + assert!(map.contains_key(&1)); + assert!(map.contains_key(&3)); + assert!(!map.contains_key(&99)); + } + + #[test] + fn test_get_or_insert_default_miss() { + let mut map: SmallMap> = SmallMap::new(); + map.get_or_insert_default("key".to_string()).push(1); + assert_eq!(map.get("key"), Some(&vec![1u32])); + } + + #[test] + fn test_get_or_insert_default_hit() { + let mut map: SmallMap = SmallMap::new(); + map.insert("key".to_string(), 42); + let v = map.get_or_insert_default("key".to_string()); + assert_eq!(*v, 42); + // Verify key was not duplicated: original value still accessible + assert_eq!(map.get("key"), Some(&42)); + } + + #[test] + fn test_get_or_insert_default_promotes() { + let mut map: SmallMap = SmallMap::new(); + map.insert(1, 10); + map.insert(2, 20); + // map is full; inserting new key via get_or_insert_default should promote + *map.get_or_insert_default(3) = 30; + assert!(matches!(map, SmallMap::Large(_))); + assert_eq!(map.get(&3), Some(&30)); + assert_eq!(map.get(&1), Some(&10)); + assert_eq!(map.get(&2), Some(&20)); + } + + #[test] + fn test_nested_map() { + let mut outer: SmallMap> = SmallMap::new(); + outer.get_or_insert_default(1).insert("a".to_string(), 10); + outer.get_or_insert_default(1).insert("b".to_string(), 20); + outer.get_or_insert_default(2).insert("c".to_string(), 30); + assert_eq!(outer.get(&1).unwrap().get("a"), Some(&10)); + assert_eq!(outer.get(&1).unwrap().get("b"), Some(&20)); + assert_eq!(outer.get(&2).unwrap().get("c"), Some(&30)); + } + + #[test] + fn test_clone() { + let mut map: SmallMap = SmallMap::new(); + map.insert(1, 10); + map.insert(2, 20); + let cloned = map.clone(); + assert_eq!(cloned.get(&1), Some(&10)); + assert_eq!(cloned.get(&2), Some(&20)); + } + + #[test] + fn test_clone_large() { + let mut map: SmallMap = SmallMap::new(); + for i in 0..5 { + map.insert(i, i * 10); + } + let cloned = map.clone(); + for i in 0..5 { + assert_eq!(cloned.get(&i), Some(&(i * 10))); + } + } +} diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs new file mode 100644 index 00000000..a081fd36 --- /dev/null +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -0,0 +1,365 @@ +use serde_json::{Map, Value}; + +use crate::{ + draft::Draft, + spec::{ChildNode, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, +}; + +use super::draft202012::{self, SubresourceIteratorInner}; + +fn visit_child<'a>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + match key { + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f("items", None, Some(index), item, draft.detect(item))?; + } + } + _ => f("items", None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + "dependencies", + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + +pub(crate) fn for_each_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { + let mut id = None; + let mut has_anchor = false; + let mut ref_ = None; + let mut schema_ref = None; + + for (key, value) in schema { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" => has_anchor |= value.as_str().is_some(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + _ => {} + } + } + + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, + } +} + +pub(crate) fn object_iter<'a>( + (key, value): (&'a String, &'a Value), +) -> SubresourceIteratorInner<'a> { + match key.as_str() { + // For these keys, yield the value once. + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => SubresourceIteratorInner::Once(value), + // For these keys, if the value is an array, iterate over its items. + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + SubresourceIteratorInner::Array(arr.iter()) + } else { + SubresourceIteratorInner::Empty + } + } + // For these keys, if the value is an object, iterate over its values. + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + SubresourceIteratorInner::Object(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + // For "items": if it's an array, iterate over its items; otherwise, yield the value once. + "items" => match value { + Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), + _ => SubresourceIteratorInner::Once(value), + }, + // For any other key, yield nothing. + _ => SubresourceIteratorInner::Empty, + } +} + +pub(crate) fn maybe_in_subresource<'r>( + segments: &Segments, + resolver: &Resolver<'r>, + subresource: ResourceRef<'_>, +) -> Result, Error> { + const IN_VALUE: &[&str] = &[ + "additionalItems", + "additionalProperties", + "contains", + "contentSchema", + "else", + "if", + "not", + "propertyNames", + "then", + "unevaluatedItems", + "unevaluatedProperties", + ]; + const IN_CHILD: &[&str] = &[ + "allOf", + "anyOf", + "oneOf", + "$defs", + "definitions", + "dependentSchemas", + "patternProperties", + "properties", + ]; + + draft202012::maybe_in_subresource_with_items_and_dependencies( + segments, + resolver, + subresource, + IN_VALUE, + IN_CHILD, + ) +} + +#[cfg(test)] +mod tests { + use crate::{spec::PathSegment, Draft}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; + + #[test] + fn test_analyze_object_2019_only_counts_plain_anchor() { + let schema = json!({ + "$id": "https://example.com/root", + "$ref": "other.json", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$anchor": "plain", + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft201909.analyze_object(object); + + assert_eq!(analysis.id, Some("https://example.com/root")); + assert!(analysis.has_anchor); + assert_eq!(analysis.ref_, Some("other.json")); + assert_eq!( + analysis.schema, + Some("https://json-schema.org/draft/2019-09/schema") + ); + + let dynamic_only = json!({ + "$dynamicAnchor": "ignored", + "properties": { + "name": { "type": "string" } + } + }); + + let object = dynamic_only.as_object().unwrap(); + let analysis = Draft::Draft201909.analyze_object(object); + + assert_eq!(analysis.id, None); + assert!(!analysis.has_anchor); + assert_eq!(analysis.ref_, None); + assert_eq!(analysis.schema, None); + } + + #[test] + fn test_for_each_owned_child_streams_expected_shapes() { + let schema = json!({ + "contentSchema": { "type": "null" }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft201909, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft201909, + ), + ( + "singleton".to_string(), + "contentSchema".to_string(), + json!({ "type": "null" }), + Draft::Draft201909, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft201909, + ), + ] + ); + } + + #[test] + fn test_for_each_child_streams_expected_values() { + let schema = json!({ + "contentSchema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft201909, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft201909,), + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + (json!({ "type": "string" }), Draft::Draft201909,), + ] + ); + } +} diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs similarity index 51% rename from crates/jsonschema-referencing/src/specification/subresources.rs rename to crates/jsonschema-referencing/src/spec/draft202012.rs index 9de4b3a5..30e201e6 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -1,9 +1,120 @@ use core::slice; use std::iter::FlatMap; -use serde_json::Value; +use serde_json::{Map, Value}; -use crate::{resource::InnerResourcePtr, segments::Segment, Error, Resolver, Segments}; +use crate::{ + draft::Draft, + segments::Segment, + spec::{ChildNode, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, +}; + +fn visit_child<'a>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + match key { + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + +pub(crate) fn for_each_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { + let mut id = None; + let mut has_anchor = false; + let mut ref_ = None; + let mut schema_ref = None; + + for (key, value) in schema { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + _ => {} + } + } + + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, + } +} type ObjectIter<'a> = FlatMap< serde_json::map::Iter<'a>, @@ -101,7 +212,7 @@ pub(crate) fn object_iter<'a>( pub(crate) fn maybe_in_subresource<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { const IN_VALUE: &[&str] = &[ "additionalProperties", @@ -138,14 +249,14 @@ pub(crate) fn maybe_in_subresource<'r>( } } } - resolver.in_subresource_inner(subresource) + resolver.in_subresource(subresource) } #[inline] pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, in_value: &[&str], in_child: &[&str], ) -> Result, Error> { @@ -153,7 +264,7 @@ pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( while let Some(segment) = iter.next() { if let Segment::Key(key) = segment { if (*key == "items" || *key == "dependencies") && subresource.contents().is_object() { - return resolver.in_subresource_inner(subresource); + return resolver.in_subresource(subresource); } if !in_value.contains(&key.as_ref()) && (!in_child.contains(&key.as_ref()) || iter.next().is_none()) @@ -162,14 +273,16 @@ pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( } } } - resolver.in_subresource_inner(subresource) + resolver.in_subresource(subresource) } #[cfg(test)] mod tests { - use crate::Draft; + use crate::{Draft, Error}; - use super::{object_iter, SubresourceIterator}; + use crate::spec::PathSegment; + + use super::{for_each_child, for_each_owned_child, object_iter, SubresourceIterator}; use ahash::HashSet; use serde_json::{json, Value}; use test_case::test_case; @@ -340,4 +453,224 @@ mod tests { "Draft {draft:?} should return empty subresources for boolean schema", ); } + + #[test] + fn test_analyze_object_collects_2020_12_metadata() { + let schema = json!({ + "$id": "https://example.com/root", + "$dynamicAnchor": "node", + "$ref": "other.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "name": { "type": "string" } + }, + "allOf": [ + { "minimum": 1 } + ] + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft202012.analyze_object(object); + + assert_eq!(analysis.id, Some("https://example.com/root")); + assert!(analysis.has_anchor); + assert_eq!(analysis.ref_, Some("other.json")); + assert_eq!( + analysis.schema, + Some("https://json-schema.org/draft/2020-12/schema") + ); + } + + #[test] + fn test_analyze_object_detects_dynamic_anchor_without_id() { + let schema = json!({ + "$dynamicAnchor": "node" + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft202012.analyze_object(object); + + assert_eq!(analysis.id, None); + assert!(analysis.has_anchor); + assert_eq!(analysis.ref_, None); + assert_eq!(analysis.schema, None); + } + + #[test] + fn test_for_each_owned_child_streams_mixed_child_shapes() { + let schema = json!({ + "not": { "type": "null" }, + "prefixItems": [ + { "type": "integer" } + ], + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft202012, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft202012, + ), + ( + "singleton".to_string(), + "not".to_string(), + json!({ "type": "null" }), + Draft::Draft202012, + ), + ( + "key_index".to_string(), + "prefixItems/0".to_string(), + json!({ "type": "integer" }), + Draft::Draft202012, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft202012, + ), + ] + ); + } + + #[test] + fn test_for_each_owned_child_stops_and_returns_err() { + let schema = json!({ + "allOf": [ + { "minimum": 1 } + ], + "not": { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + let error = for_each_owned_child(object, Draft::Draft202012, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + if seen.len() == 2 { + return Err(Error::unknown_specification("stop")); + } + Ok(()) + }) + .expect_err("the callback error should stop traversal"); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft202012, + ), + ( + "singleton".to_string(), + "not".to_string(), + json!({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }), + Draft::Draft201909, + ), + ] + ); + assert_eq!( + error.to_string(), + "Unknown meta-schema: 'stop'. Custom meta-schemas must be registered in the registry before use" + ); + } + + #[test] + fn test_for_each_child_streams_mixed_child_values() { + let schema = json!({ + "not": { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }, + "prefixItems": [ + { "type": "integer" } + ], + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_child(object, Draft::Draft202012, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft202012), + ( + json!({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }), + Draft::Draft201909, + ), + (json!({ "type": "integer" }), Draft::Draft202012), + (json!({ "type": "string" }), Draft::Draft202012,), + ] + ); + } } diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs new file mode 100644 index 00000000..7f6a4662 --- /dev/null +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -0,0 +1,382 @@ +use serde_json::{Map, Value}; + +use crate::{ + draft::Draft, + spec::{ChildNode, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, +}; + +use super::draft202012::{self, SubresourceIteratorInner}; + +fn visit_child<'a>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + match key { + "additionalItems" | "additionalProperties" if value.is_object() => { + f(key, None, None, value, draft.detect(value))?; + } + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + _ => f(key, None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + +pub(crate) fn for_each_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { + let mut raw_id = None; + let mut ref_ = None; + let mut schema_ref = None; + + for (key, value) in schema { + match key.as_str() { + "id" => raw_id = value.as_str(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && ref_.is_none() => Some(id), + _ => None, + }; + + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, + } +} + +pub(crate) fn object_iter<'a>( + (key, value): (&'a String, &'a Value), +) -> SubresourceIteratorInner<'a> { + match key.as_str() { + // For "items": if it’s an array, iterate over it; otherwise, yield one element. + "items" => match value { + Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), + _ => SubresourceIteratorInner::Once(value), + }, + // For "allOf", "anyOf", "oneOf", "prefixItems": if the value is an array, iterate over it. + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + SubresourceIteratorInner::Array(arr.iter()) + } else { + SubresourceIteratorInner::Empty + } + } + // For "$defs", "definitions", "dependentSchemas", "patternProperties", "properties": + // if the value is an object, iterate over its values. + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + SubresourceIteratorInner::Object(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + // For "dependencies": if the value is an object, iterate over its values filtered to only those that are objects. + "dependencies" => { + if let Some(obj) = value.as_object() { + SubresourceIteratorInner::FilteredObject(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + // For "additionalItems" and "additionalProperties", only if the value is an object. + "additionalItems" | "additionalProperties" if value.is_object() => { + SubresourceIteratorInner::Once(value) + } + // For other keys that were originally in the “single element” group: + "contains" + | "contentSchema" + | "else" + | "if" + | "propertyNames" + | "not" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => SubresourceIteratorInner::Once(value), + _ => SubresourceIteratorInner::Empty, + } +} + +pub(crate) fn maybe_in_subresource<'r>( + segments: &Segments, + resolver: &Resolver<'r>, + subresource: ResourceRef<'_>, +) -> Result, Error> { + const IN_VALUE: &[&str] = &["additionalItems", "additionalProperties", "not"]; + const IN_CHILD: &[&str] = &[ + "allOf", + "anyOf", + "oneOf", + "definitions", + "patternProperties", + "properties", + ]; + draft202012::maybe_in_subresource_with_items_and_dependencies( + segments, + resolver, + subresource, + IN_VALUE, + IN_CHILD, + ) +} + +#[cfg(test)] +mod tests { + use crate::{spec::PathSegment, Draft}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; + + #[test] + fn test_analyze_object_collects_control_keys() { + let schema = json!({ + "id": "http://example.com/node", + "$schema": "http://example.com/meta", + "properties": { + "name": {"type": "string"} + }, + "items": {"type": "integer"} + }); + let analysis = Draft::Draft4.analyze_object( + schema + .as_object() + .expect("schema object should be analyzed"), + ); + + assert_eq!(analysis.id, Some("http://example.com/node")); + assert!(!analysis.has_anchor); + assert_eq!(analysis.ref_, None); + assert_eq!(analysis.schema, Some("http://example.com/meta")); + } + + #[test] + fn test_analyze_object_collects_refs_and_schema() { + let schema = json!({ + "id": "http://example.com/node", + "$schema": "http://example.com/meta", + "properties": { + "name": {"type": "string"} + }, + "items": {"type": "integer"} + }); + + let analysis = Draft::Draft4 + .analyze_object(schema.as_object().expect("schema object should be scanned")); + + assert_eq!( + ( + analysis.ref_.map(str::to_string), + analysis.schema.map(str::to_string) + ), + (None, Some("http://example.com/meta".to_string())) + ); + } + + #[test] + fn test_analyze_object_draft4_treats_hash_id_as_anchor() { + let schema = json!({ + "id": "#node", + "dependencies": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft4.analyze_object(object); + + assert!(analysis.has_anchor); + assert_eq!(analysis.id, None); + } + + #[test] + fn test_for_each_owned_child_streams_filtered_dependency_children() { + let schema = json!({ + "items": [ + { "type": "integer" } + ], + "dependencies": { + "name": { "type": "string" }, + "flag": [ "ignored" ] + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft4, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_key".to_string(), + "dependencies/name".to_string(), + json!({ "type": "string" }), + Draft::Draft4, + ), + ( + "key_index".to_string(), + "items/0".to_string(), + json!({ "type": "integer" }), + Draft::Draft4, + ), + ] + ); + } + + #[test] + fn test_for_each_child_streams_expected_children() { + let schema = json!({ + "additionalProperties": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "string" + }, + "items": [ + { "type": "integer" } + ], + "dependencies": { + "name": { "type": "boolean" }, + "flag": [ "ignored" ] + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft4, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "string" + }), + Draft::Draft202012, + ), + (json!({ "type": "boolean" }), Draft::Draft4), + (json!({ "type": "integer" }), Draft::Draft4), + ] + ); + } +} diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs new file mode 100644 index 00000000..f7b1ab33 --- /dev/null +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -0,0 +1,363 @@ +use serde_json::{Map, Value}; + +use crate::{ + draft::Draft, + spec::{ChildNode, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, +}; + +use super::draft202012::{self, SubresourceIteratorInner}; + +fn visit_child<'a>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + match key { + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f("items", None, Some(index), item, draft.detect(item))?; + } + } + _ => f("items", None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + "dependencies", + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + +pub(crate) fn for_each_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { + let mut raw_id = None; + let mut ref_ = None; + let mut schema_ref = None; + + for (key, value) in schema { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && ref_.is_none() => Some(id), + _ => None, + }; + + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, + } +} + +pub(crate) fn object_iter<'a>( + (key, value): (&'a String, &'a Value), +) -> SubresourceIteratorInner<'a> { + match key.as_str() { + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + SubresourceIteratorInner::Once(value) + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + SubresourceIteratorInner::Array(arr.iter()) + } else { + SubresourceIteratorInner::Empty + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + SubresourceIteratorInner::Object(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + "items" => match value { + Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), + _ => SubresourceIteratorInner::Once(value), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + SubresourceIteratorInner::FilteredObject(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + _ => SubresourceIteratorInner::Empty, + } +} + +pub(crate) fn maybe_in_subresource<'r>( + segments: &Segments, + resolver: &Resolver<'r>, + subresource: ResourceRef<'_>, +) -> Result, Error> { + const IN_VALUE: &[&str] = &[ + "additionalItems", + "additionalProperties", + "contains", + "not", + "propertyNames", + ]; + const IN_CHILD: &[&str] = &[ + "allOf", + "anyOf", + "oneOf", + "definitions", + "patternProperties", + "properties", + ]; + draft202012::maybe_in_subresource_with_items_and_dependencies( + segments, + resolver, + subresource, + IN_VALUE, + IN_CHILD, + ) +} + +#[cfg(test)] +mod tests { + use crate::{spec::PathSegment, Draft, Error}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; + + #[test] + fn test_analyze_object_draft6_keeps_plain_id_as_resource() { + let schema = json!({ + "$id": "child.json", + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft6.analyze_object(object); + + assert_eq!(analysis.id, Some("child.json")); + assert!(!analysis.has_anchor); + } + + #[test] + fn test_for_each_child_streams_expected_children() { + let schema = json!({ + "not": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft6, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + (json!({ "type": "string" }), Draft::Draft6), + ] + ); + } + + #[test] + fn test_for_each_owned_child_streams_expected_shapes() { + let schema = json!({ + "allOf": [ + { "minimum": 1 } + ], + "not": { "type": "null" }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft6, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft6, + ), + ( + "singleton".to_string(), + "not".to_string(), + json!({ "type": "null" }), + Draft::Draft6, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft6, + ), + ] + ); + } + + #[test] + fn test_for_each_child_stops_and_returns_err() { + let schema = json!({ + "allOf": [ + { "minimum": 1 } + ], + "not": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + let error = for_each_child(object, Draft::Draft6, &mut |child, draft| { + seen.push((child.clone(), draft)); + if seen.len() == 2 { + return Err(Error::unknown_specification("stop")); + } + Ok(()) + }) + .expect_err("the callback error should stop traversal"); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft6), + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + ] + ); + assert_eq!( + error.to_string(), + "Unknown meta-schema: 'stop'. Custom meta-schemas must be registered in the registry before use" + ); + } +} diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs new file mode 100644 index 00000000..ba43ceca --- /dev/null +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -0,0 +1,345 @@ +use serde_json::{Map, Value}; + +use crate::{ + draft::Draft, + spec::{ChildNode, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, +}; + +use super::draft202012::{self, SubresourceIteratorInner}; + +fn visit_child<'a>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + match key { + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f("items", None, Some(index), item, draft.detect(item))?; + } + } + _ => f("items", None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + "dependencies", + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + +pub(crate) fn for_each_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { + let mut raw_id = None; + let mut ref_ = None; + let mut schema_ref = None; + + for (key, value) in schema { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && ref_.is_none() => Some(id), + _ => None, + }; + + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, + } +} + +pub(crate) fn object_iter<'a>( + (key, value): (&'a String, &'a Value), +) -> SubresourceIteratorInner<'a> { + match key.as_str() { + // For these keys, yield the value once. + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => SubresourceIteratorInner::Once(value), + // For these keys, if the value is an array, iterate over its items. + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + // In the old draft, flatten() was used. + // Here we simply iterate over the array. + SubresourceIteratorInner::Array(arr.iter()) + } else { + SubresourceIteratorInner::Empty + } + } + // For these keys, if the value is an object, iterate over its values. + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + // flat_map in the old draft: iterate over the object's values. + SubresourceIteratorInner::Object(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + // For "items": if it's an array, iterate over its items; otherwise, yield the value once. + "items" => match value { + Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), + _ => SubresourceIteratorInner::Once(value), + }, + // For "dependencies": if the value is an object, iterate over its values filtered to only those that are objects. + "dependencies" => { + if let Some(obj) = value.as_object() { + SubresourceIteratorInner::FilteredObject(obj.values()) + } else { + SubresourceIteratorInner::Empty + } + } + // For any other key, yield nothing. + _ => SubresourceIteratorInner::Empty, + } +} + +pub(crate) fn maybe_in_subresource<'r>( + segments: &Segments, + resolver: &Resolver<'r>, + subresource: ResourceRef<'_>, +) -> Result, Error> { + const IN_VALUE: &[&str] = &[ + "additionalItems", + "additionalProperties", + "contains", + "else", + "if", + "not", + "propertyNames", + "then", + ]; + const IN_CHILD: &[&str] = &[ + "allOf", + "anyOf", + "oneOf", + "definitions", + "patternProperties", + "properties", + ]; + draft202012::maybe_in_subresource_with_items_and_dependencies( + segments, + resolver, + subresource, + IN_VALUE, + IN_CHILD, + ) +} + +#[cfg(test)] +mod tests { + use crate::{spec::PathSegment, Draft}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; + + #[test] + fn test_analyze_object_draft7_keeps_plain_id_as_resource() { + let schema = json!({ + "$id": "child.json", + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft7.analyze_object(object); + + assert_eq!(analysis.id, Some("child.json")); + assert!(!analysis.has_anchor); + } + + #[test] + fn test_for_each_owned_child_streams_expected_shapes() { + let schema = json!({ + "then": { "type": "null" }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft7, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft7, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft7, + ), + ( + "singleton".to_string(), + "then".to_string(), + json!({ "type": "null" }), + Draft::Draft7, + ), + ] + ); + } + + #[test] + fn test_for_each_child_streams_expected_values() { + let schema = json!({ + "then": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft7, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft7), + (json!({ "type": "string" }), Draft::Draft7,), + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + ] + ); + } +} diff --git a/crates/jsonschema-referencing/src/specification/ids.rs b/crates/jsonschema-referencing/src/spec/ids.rs similarity index 100% rename from crates/jsonschema-referencing/src/specification/ids.rs rename to crates/jsonschema-referencing/src/spec/ids.rs diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs new file mode 100644 index 00000000..bb176c2e --- /dev/null +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -0,0 +1,98 @@ +//! Types used by the schema traversal machinery during registry building. +//! +//! When the registry walks a schema document, each JSON object is scanned to extract +//! relevant information: +//! - [`ObjectAnalysis`]: shared per-object metadata for the new one-pass analyzer. +//! - [`ChildNode`]: a child to process next in the BFS queue, with its path and active draft. +//! +//! The sub-modules contain draft-specific scanning logic that produces these types. + +use serde_json::Value; + +pub(crate) mod draft201909; +pub(crate) mod draft202012; +pub(crate) mod draft4; +pub(crate) mod draft6; +pub(crate) mod draft7; +pub(crate) mod ids; + +use crate::draft::Draft; + +/// A single path step to a child node: an object key or array index. +#[derive(Copy, Clone)] +pub(crate) enum PathSegment<'a> { + Key(&'a str), + Index(usize), +} + +/// Shared metadata extracted from one schema object by the new analyzer path. +pub(crate) struct ObjectAnalysis<'a> { + pub(crate) id: Option<&'a str>, + pub(crate) has_anchor: bool, + pub(crate) ref_: Option<&'a str>, + pub(crate) schema: Option<&'a str>, +} + +/// A child node queued for the BFS traversal of a schema document. +#[derive(Copy, Clone)] +pub(crate) struct ChildNode<'a> { + pub(crate) first: PathSegment<'a>, + pub(crate) second: Option>, + pub(crate) value: &'a Value, + pub(crate) draft: Draft, +} + +impl<'a> ChildNode<'a> { + #[inline] + pub(crate) fn key(key: &'a str, value: &'a Value, draft: Draft) -> Self { + Self { + first: PathSegment::Key(key), + second: None, + value, + draft, + } + } + + #[inline] + pub(crate) fn key_index(key: &'a str, index: usize, value: &'a Value, draft: Draft) -> Self { + Self { + first: PathSegment::Key(key), + second: Some(PathSegment::Index(index)), + value, + draft, + } + } + + #[inline] + pub(crate) fn key_key( + key: &'a str, + child_key: &'a str, + value: &'a Value, + draft: Draft, + ) -> Self { + Self { + first: PathSegment::Key(key), + second: Some(PathSegment::Key(child_key)), + value, + draft, + } + } + + #[inline] + pub(crate) fn from_parts( + key: &'a str, + child_key: Option<&'a str>, + index: Option, + value: &'a Value, + draft: Draft, + ) -> Self { + match (child_key, index) { + (Some(child_key), None) => Self::key_key(key, child_key, value, draft), + (None, Some(index)) => Self::key_index(key, index, value, draft), + (None, None) => Self::key(key, value, draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + } + } +} diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/specification/draft201909.rs deleted file mode 100644 index 0cf2dac7..00000000 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ /dev/null @@ -1,92 +0,0 @@ -use serde_json::Value; - -use crate::{resource::InnerResourcePtr, segments::Segment, Error, Resolver, Segments}; - -use super::subresources::SubresourceIteratorInner; - -pub(crate) fn object_iter<'a>( - (key, value): (&'a String, &'a Value), -) -> SubresourceIteratorInner<'a> { - match key.as_str() { - // For these keys, yield the value once. - "additionalItems" - | "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => SubresourceIteratorInner::Once(value), - // For these keys, if the value is an array, iterate over its items. - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - SubresourceIteratorInner::Array(arr.iter()) - } else { - SubresourceIteratorInner::Empty - } - } - // For these keys, if the value is an object, iterate over its values. - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - SubresourceIteratorInner::Object(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - // For "items": if it's an array, iterate over its items; otherwise, yield the value once. - "items" => match value { - Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), - _ => SubresourceIteratorInner::Once(value), - }, - // For any other key, yield nothing. - _ => SubresourceIteratorInner::Empty, - } -} - -pub(crate) fn maybe_in_subresource<'r>( - segments: &Segments, - resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, -) -> Result, Error> { - const IN_VALUE: &[&str] = &[ - "additionalItems", - "additionalProperties", - "contains", - "contentSchema", - "else", - "if", - "not", - "propertyNames", - "then", - "unevaluatedItems", - "unevaluatedProperties", - ]; - const IN_CHILD: &[&str] = &[ - "allOf", - "anyOf", - "oneOf", - "$defs", - "definitions", - "dependentSchemas", - "patternProperties", - "properties", - ]; - - let mut iter = segments.iter(); - while let Some(segment) = iter.next() { - if let Segment::Key(key) = segment { - if *key == "items" && subresource.contents().is_object() { - return resolver.in_subresource_inner(subresource); - } - if !IN_VALUE.contains(&key.as_ref()) - && (!IN_CHILD.contains(&key.as_ref()) || iter.next().is_none()) - { - return Ok(resolver.clone()); - } - } - } - resolver.in_subresource_inner(subresource) -} diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/specification/draft4.rs deleted file mode 100644 index 9082062c..00000000 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ /dev/null @@ -1,80 +0,0 @@ -use serde_json::Value; - -use crate::{resource::InnerResourcePtr, Error, Resolver, Segments}; - -use super::subresources::{self, SubresourceIteratorInner}; - -pub(crate) fn object_iter<'a>( - (key, value): (&'a String, &'a Value), -) -> SubresourceIteratorInner<'a> { - match key.as_str() { - // For "items": if it’s an array, iterate over it; otherwise, yield one element. - "items" => match value { - Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), - _ => SubresourceIteratorInner::Once(value), - }, - // For "allOf", "anyOf", "oneOf", "prefixItems": if the value is an array, iterate over it. - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - SubresourceIteratorInner::Array(arr.iter()) - } else { - SubresourceIteratorInner::Empty - } - } - // For "$defs", "definitions", "dependentSchemas", "patternProperties", "properties": - // if the value is an object, iterate over its values. - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - SubresourceIteratorInner::Object(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - // For "dependencies": if the value is an object, iterate over its values filtered to only those that are objects. - "dependencies" => { - if let Some(obj) = value.as_object() { - SubresourceIteratorInner::FilteredObject(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - // For "additionalItems" and "additionalProperties", only if the value is an object. - "additionalItems" | "additionalProperties" if value.is_object() => { - SubresourceIteratorInner::Once(value) - } - // For other keys that were originally in the “single element” group: - "contains" - | "contentSchema" - | "else" - | "if" - | "propertyNames" - | "not" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => SubresourceIteratorInner::Once(value), - _ => SubresourceIteratorInner::Empty, - } -} - -pub(crate) fn maybe_in_subresource<'r>( - segments: &Segments, - resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, -) -> Result, Error> { - const IN_VALUE: &[&str] = &["additionalItems", "additionalProperties", "not"]; - const IN_CHILD: &[&str] = &[ - "allOf", - "anyOf", - "oneOf", - "definitions", - "patternProperties", - "properties", - ]; - subresources::maybe_in_subresource_with_items_and_dependencies( - segments, - resolver, - subresource, - IN_VALUE, - IN_CHILD, - ) -} diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/specification/draft6.rs deleted file mode 100644 index 81ff8218..00000000 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ /dev/null @@ -1,70 +0,0 @@ -use serde_json::Value; - -use crate::{resource::InnerResourcePtr, Error, Resolver, Segments}; - -use super::subresources::{self, SubresourceIteratorInner}; - -pub(crate) fn object_iter<'a>( - (key, value): (&'a String, &'a Value), -) -> SubresourceIteratorInner<'a> { - match key.as_str() { - "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - SubresourceIteratorInner::Once(value) - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - SubresourceIteratorInner::Array(arr.iter()) - } else { - SubresourceIteratorInner::Empty - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - SubresourceIteratorInner::Object(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - "items" => match value { - Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), - _ => SubresourceIteratorInner::Once(value), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - SubresourceIteratorInner::FilteredObject(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - _ => SubresourceIteratorInner::Empty, - } -} - -pub(crate) fn maybe_in_subresource<'r>( - segments: &Segments, - resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, -) -> Result, Error> { - const IN_VALUE: &[&str] = &[ - "additionalItems", - "additionalProperties", - "contains", - "not", - "propertyNames", - ]; - const IN_CHILD: &[&str] = &[ - "allOf", - "anyOf", - "oneOf", - "definitions", - "patternProperties", - "properties", - ]; - subresources::maybe_in_subresource_with_items_and_dependencies( - segments, - resolver, - subresource, - IN_VALUE, - IN_CHILD, - ) -} diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/specification/draft7.rs deleted file mode 100644 index c61af4f4..00000000 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ /dev/null @@ -1,87 +0,0 @@ -use serde_json::Value; - -use crate::{resource::InnerResourcePtr, Error, Resolver, Segments}; - -use super::subresources::{self, SubresourceIteratorInner}; - -pub(crate) fn object_iter<'a>( - (key, value): (&'a String, &'a Value), -) -> SubresourceIteratorInner<'a> { - match key.as_str() { - // For these keys, yield the value once. - "additionalItems" - | "additionalProperties" - | "contains" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" => SubresourceIteratorInner::Once(value), - // For these keys, if the value is an array, iterate over its items. - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - // In the old draft, flatten() was used. - // Here we simply iterate over the array. - SubresourceIteratorInner::Array(arr.iter()) - } else { - SubresourceIteratorInner::Empty - } - } - // For these keys, if the value is an object, iterate over its values. - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - // flat_map in the old draft: iterate over the object's values. - SubresourceIteratorInner::Object(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - // For "items": if it's an array, iterate over its items; otherwise, yield the value once. - "items" => match value { - Value::Array(arr) => SubresourceIteratorInner::Array(arr.iter()), - _ => SubresourceIteratorInner::Once(value), - }, - // For "dependencies": if the value is an object, iterate over its values filtered to only those that are objects. - "dependencies" => { - if let Some(obj) = value.as_object() { - SubresourceIteratorInner::FilteredObject(obj.values()) - } else { - SubresourceIteratorInner::Empty - } - } - // For any other key, yield nothing. - _ => SubresourceIteratorInner::Empty, - } -} - -pub(crate) fn maybe_in_subresource<'r>( - segments: &Segments, - resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, -) -> Result, Error> { - const IN_VALUE: &[&str] = &[ - "additionalItems", - "additionalProperties", - "contains", - "else", - "if", - "not", - "propertyNames", - "then", - ]; - const IN_CHILD: &[&str] = &[ - "allOf", - "anyOf", - "oneOf", - "definitions", - "patternProperties", - "properties", - ]; - subresources::maybe_in_subresource_with_items_and_dependencies( - segments, - resolver, - subresource, - IN_VALUE, - IN_CHILD, - ) -} diff --git a/crates/jsonschema-referencing/tests/suite b/crates/jsonschema-referencing/tests/suite index de5bc6f7..b062c384 160000 --- a/crates/jsonschema-referencing/tests/suite +++ b/crates/jsonschema-referencing/tests/suite @@ -1 +1 @@ -Subproject commit de5bc6f7ff9fda78f35bd6f6276b3bcd3a3f8c3f +Subproject commit b062c3849e41b7a173d0dd64081811effb0c19ab diff --git a/crates/jsonschema-referencing/tests/suite.rs b/crates/jsonschema-referencing/tests/suite.rs index 967392a1..4f322978 100644 --- a/crates/jsonschema-referencing/tests/suite.rs +++ b/crates/jsonschema-referencing/tests/suite.rs @@ -43,15 +43,16 @@ fn test_suite(draft: &'static str, test: Test) { "json-schema-draft-2020-12" => Draft::Draft202012, _ => panic!("Unknown draft"), }; - let registry = Registry::try_from_resources( - test.registry - .into_iter() - .map(|(uri, content)| (uri, draft.create_resource(content))), - ) - .expect("Invalid registry"); - let resolver = registry - .try_resolver(test.base_uri.unwrap_or_default()) - .expect("Invalid base URI"); + let mut registry = Registry::new().draft(draft); + for (uri, content) in test.registry { + registry = registry + .add(uri, draft.create_resource(content)) + .expect("Invalid registry input"); + } + let registry = registry.prepare().expect("Invalid registry"); + let resolver = registry.resolver( + referencing::uri::from_str(test.base_uri.unwrap_or_default()).expect("Invalid base URI"), + ); if test.error.is_some() { assert!(resolver.lookup(test.reference).is_err()); } else { diff --git a/crates/jsonschema/src/bundler.rs b/crates/jsonschema/src/bundler.rs index 3d21e1dc..357e4057 100644 --- a/crates/jsonschema/src/bundler.rs +++ b/crates/jsonschema/src/bundler.rs @@ -2,11 +2,10 @@ use crate::{compiler, options::ValidationOptions}; use ahash::AHashSet; use referencing::{Draft, Resolver}; use serde_json::{Map, Value}; - fn bundle_from_registry( schema: &Value, draft: Draft, - registry: &referencing::Registry, + registry: &referencing::Registry<'_>, base_uri: &referencing::Uri, ) -> Result { let resolver = registry.resolver(base_uri.clone()); @@ -19,27 +18,50 @@ fn bundle_from_registry( } pub(crate) fn bundle_with_options( - config: &ValidationOptions, + config: &ValidationOptions<'_>, schema: &Value, ) -> Result { let draft = config.draft_for(schema)?; - let resource = draft.create_resource(schema.clone()); let resource_ref = draft.create_resource_ref(schema); + if let Some(registry) = config.registry { + let requested_base_uri = + compiler::resolve_base_uri(config.base_uri.as_ref(), resource_ref.id())?; + let overlay = registry + .add(requested_base_uri.as_str(), resource_ref)? + .retriever(config.retriever.clone()) + .draft(draft) + .prepare()?; + let base_uri = + compiler::normalized_base_uri_for_generated_registry(&overlay, &requested_base_uri); + return bundle_from_registry(schema, draft, &overlay, &base_uri); + } let (registry, base_uri) = - compiler::build_registry(config, draft, resource, resource_ref.id())?; + compiler::build_registry(config, draft, resource_ref, resource_ref.id())?; bundle_from_registry(schema, draft, ®istry, &base_uri) } #[cfg(feature = "resolve-async")] pub(crate) async fn bundle_with_options_async( - config: &crate::options::ValidationOptions>, + config: &crate::options::ValidationOptions<'_, std::sync::Arc>, schema: &Value, ) -> Result { let draft = config.draft_for(schema).await?; - let resource = draft.create_resource(schema.clone()); let resource_ref = draft.create_resource_ref(schema); + if let Some(registry) = config.registry { + let requested_base_uri = + compiler::resolve_base_uri(config.base_uri.as_ref(), resource_ref.id())?; + let overlay = registry + .add(requested_base_uri.as_str(), resource_ref)? + .async_retriever(config.retriever.clone()) + .draft(draft) + .async_prepare() + .await?; + let base_uri = + compiler::normalized_base_uri_for_generated_registry(&overlay, &requested_base_uri); + return bundle_from_registry(schema, draft, &overlay, &base_uri); + } let (registry, base_uri) = - compiler::build_registry_async(config, draft, resource, resource_ref.id()).await?; + compiler::build_registry_async(config, draft, resource_ref, resource_ref.id()).await?; bundle_from_registry(schema, draft, ®istry, &base_uri) } diff --git a/crates/jsonschema/src/compiler.rs b/crates/jsonschema/src/compiler.rs index 95a42ee0..07fc6a30 100644 --- a/crates/jsonschema/src/compiler.rs +++ b/crates/jsonschema/src/compiler.rs @@ -19,11 +19,10 @@ use crate::{ }; use ahash::{AHashMap, AHashSet}; use referencing::{ - uri, Draft, List, Registry, Resolved, Resolver, Resource, ResourceRef, Uri, Vocabulary, - VocabularySet, + uri, Draft, List, Resolved, Resolver, ResourceRef, Uri, Vocabulary, VocabularySet, }; use serde_json::{Map, Value}; -use std::{borrow::Cow, cell::RefCell, iter::once, rc::Rc, sync::Arc}; +use std::{cell::RefCell, rc::Rc, sync::Arc}; const DEFAULT_SCHEME: &str = "json-schema"; pub(crate) const DEFAULT_BASE_URI: &str = "json-schema:///"; @@ -33,6 +32,62 @@ type SharedCache = Rc>>; /// Type alias for shared sets in compiler state. type SharedSet = Rc>>; +pub(crate) trait CompilationOptions { + fn validate_formats(&self) -> Option; + fn are_unknown_formats_ignored(&self) -> bool; + fn get_content_media_type_check(&self, media_type: &str) -> Option; + fn content_encoding_check(&self, content_encoding: &str) -> Option; + fn get_content_encoding_convert( + &self, + content_encoding: &str, + ) -> Option; + fn get_keyword_factory(&self, name: &str) -> Option<&Arc>; + fn get_format(&self, format: &str) -> Option<(&String, &Arc)>; + fn pattern_options(&self) -> PatternEngineOptions; + fn email_options(&self) -> Option<&email_address::Options>; +} + +impl CompilationOptions for ValidationOptions<'_, R> { + fn validate_formats(&self) -> Option { + ValidationOptions::validate_formats(self) + } + + fn are_unknown_formats_ignored(&self) -> bool { + ValidationOptions::are_unknown_formats_ignored(self) + } + + fn get_content_media_type_check(&self, media_type: &str) -> Option { + ValidationOptions::get_content_media_type_check(self, media_type) + } + + fn content_encoding_check(&self, content_encoding: &str) -> Option { + ValidationOptions::content_encoding_check(self, content_encoding) + } + + fn get_content_encoding_convert( + &self, + content_encoding: &str, + ) -> Option { + ValidationOptions::get_content_encoding_convert(self, content_encoding) + } + + fn get_keyword_factory(&self, name: &str) -> Option<&Arc> { + ValidationOptions::get_keyword_factory(self, name) + } + + fn get_format(&self, format: &str) -> Option<(&String, &Arc)> { + ValidationOptions::get_format(self, format) + } + + fn pattern_options(&self) -> PatternEngineOptions { + ValidationOptions::compiler_pattern_options(self) + } + + fn email_options(&self) -> Option<&email_address::Options> { + ValidationOptions::compiler_email_options(self) + } +} + #[derive(Hash, PartialEq, Eq, Clone, Debug)] pub(crate) struct LocationCacheKey { pub(crate) base_uri: Arc>, @@ -116,10 +171,9 @@ impl SharedContextState { } /// Per-location view used while compiling schemas into validators. -#[derive(Debug, Clone)] +#[derive(Clone)] pub(crate) struct Context<'a> { - config: &'a ValidationOptions, - pub(crate) registry: &'a Registry, + config: &'a dyn CompilationOptions, resolver: Resolver<'a>, vocabularies: VocabularySet, location: Location, @@ -145,8 +199,7 @@ pub(crate) struct Context<'a> { impl<'a> Context<'a> { pub(crate) fn new( - config: &'a ValidationOptions, - registry: &'a Registry, + config: &'a dyn CompilationOptions, resolver: Resolver<'a>, vocabularies: VocabularySet, draft: Draft, @@ -154,7 +207,6 @@ impl<'a> Context<'a> { ) -> Self { Context { config, - registry, resolver, resource_base: location.clone(), location, @@ -166,7 +218,7 @@ impl<'a> Context<'a> { pub(crate) fn draft(&self) -> Draft { self.draft } - pub(crate) fn config(&self) -> &ValidationOptions { + pub(crate) fn config(&self) -> &dyn CompilationOptions { self.config } @@ -178,7 +230,6 @@ impl<'a> Context<'a> { let resolver = self.resolver.in_subresource(resource)?; Ok(Context { config: self.config, - registry: self.registry, resolver, vocabularies: self.vocabularies.clone(), draft: resource.draft(), @@ -196,7 +247,6 @@ impl<'a> Context<'a> { let location = self.location.join(chunk); Context { config: self.config, - registry: self.registry, resolver: self.resolver.clone(), vocabularies: self.vocabularies.clone(), resource_base: self.resource_base.clone(), @@ -268,6 +318,9 @@ impl<'a> Context<'a> { pub(crate) fn supports_integer_valued_numbers(&self) -> bool { !matches!(self.draft, Draft::Draft4) } + pub(crate) fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { + self.resolver.find_vocabularies(draft, contents) + } pub(crate) fn validates_formats_by_default(&self) -> bool { self.config.validate_formats().unwrap_or(matches!( self.draft, @@ -286,7 +339,6 @@ impl<'a> Context<'a> { ) -> Context<'a> { Context { config: self.config, - registry: self.registry, resolver, draft, vocabularies, @@ -326,13 +378,13 @@ impl<'a> Context<'a> { ) -> Result { let uri = self .resolver - .resolve_against(&self.resolver.base_uri().borrow(), reference)?; + .resolve_uri(&self.resolver.base_uri().borrow(), reference)?; Ok(self.shared.seen.borrow().contains(&*uri)) } pub(crate) fn mark_seen(&self, reference: &str) -> Result<(), referencing::Error> { let uri = self .resolver - .resolve_against(&self.resolver.base_uri().borrow(), reference)?; + .resolve_uri(&self.resolver.base_uri().borrow(), reference)?; self.shared.seen.borrow_mut().insert(uri); Ok(()) } @@ -350,7 +402,7 @@ impl<'a> Context<'a> { } let result = self .resolver - .resolve_against(&self.resolver.base_uri().borrow(), &buffer); + .resolve_uri(&self.resolver.base_uri().borrow(), &buffer); buffer.clear(); result } @@ -360,7 +412,7 @@ impl<'a> Context<'a> { reference: &str, ) -> Result>, referencing::Error> { self.resolver - .resolve_against(&self.resolver.base_uri().borrow(), reference) + .resolve_uri(&self.resolver.base_uri().borrow(), reference) } pub(crate) fn cached_location_node(&self, key: &LocationCacheKey) -> Option { @@ -668,142 +720,142 @@ impl<'a> Context<'a> { } } -pub(crate) fn build_registry( - config: &ValidationOptions, +pub(crate) fn build_registry<'a>( + config: &'a ValidationOptions<'a>, draft: Draft, - resource: referencing::Resource, - schema_id: Option<&str>, -) -> Result<(Arc, referencing::Uri), referencing::Error> { - let base_uri = if let Some(base_uri) = config.base_uri.as_ref() { - uri::from_str(base_uri)? - } else { - uri::from_str(schema_id.unwrap_or(DEFAULT_BASE_URI))? - }; - - // Build a registry & resolver needed for validator compilation - // Clone resources to drain them without mutating the original config - let pairs = collect_resource_pairs(base_uri.as_str(), resource, config.resources.clone()); - - let registry = if let Some(ref registry) = config.registry { - Arc::new(registry.clone().try_with_resources_and_retriever( - pairs, - &*config.retriever, - draft, - )?) - } else { - Arc::new( - Registry::options() - .draft(draft) - .retriever(Arc::clone(&config.retriever)) - .build(pairs)?, - ) - }; + resource: ResourceRef<'a>, + schema_id: Option<&'a str>, +) -> Result<(referencing::Registry<'a>, referencing::Uri), referencing::Error> { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), schema_id)?; + let registry = referencing::Registry::new() + .retriever(config.retriever.clone()) + .draft(draft) + .add(base_uri.as_str(), resource)? + .prepare()?; Ok((registry, base_uri)) } pub(crate) fn build_validator( - config: &ValidationOptions, + config: &ValidationOptions<'_>, schema: &Value, ) -> Result> { let draft = config.draft_for(schema)?; - let resource_ref = draft.create_resource_ref(schema); // single computation - let resource = draft.create_resource(schema.clone()); - let (registry, base_uri) = build_registry(config, draft, resource, resource_ref.id())?; - let vocabularies = registry.find_vocabularies(draft, schema); - let resolver = registry.resolver(base_uri); - - let ctx = Context::new( - config, - ®istry, - resolver, - vocabularies, - draft, - Location::new(), - ); + let resource = draft.create_resource_ref(schema); // Validate the schema itself if config.validate_schema { validate_schema(draft, schema)?; } - // Finally, compile the validator - let root = compile(&ctx, resource_ref).map_err(ValidationError::to_owned)?; - let draft = config.draft(); - Ok(Validator { root, draft }) + if let Some(registry) = config.registry { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), resource.id())?; + let overlay = registry + .add(base_uri.as_str(), resource)? + .retriever(config.retriever.clone()) + .draft(draft) + .prepare()?; + return build_validator_with_registry(config, schema, draft, resource, &overlay); + } + + let (registry, _) = build_registry(config, draft, resource, resource.id())?; + build_validator_with_registry(config, schema, draft, resource, ®istry) } #[cfg(feature = "resolve-async")] -pub(crate) async fn build_registry_async( - config: &ValidationOptions>, +pub(crate) async fn build_registry_async<'a>( + config: &'a ValidationOptions<'a, Arc>, draft: Draft, - resource: referencing::Resource, - schema_id: Option<&str>, -) -> Result<(Arc, referencing::Uri), referencing::Error> { - let base_uri = if let Some(base_uri) = config.base_uri.as_ref() { - uri::from_str(base_uri)? - } else { - uri::from_str(schema_id.unwrap_or(DEFAULT_BASE_URI))? - }; - - // Build a registry & resolver needed for validator compilation - // Clone resources to drain them without mutating the original config - let pairs = collect_resource_pairs(base_uri.as_str(), resource, config.resources.clone()); - - let registry = if let Some(ref registry) = config.registry { - Arc::new( - registry - .clone() - .try_with_resources_and_retriever_async(pairs, &*config.retriever, draft) - .await?, - ) - } else { - Arc::new( - Registry::options() - .draft(draft) - .async_retriever(Arc::clone(&config.retriever)) - .build(pairs) - .await?, - ) - }; + resource: ResourceRef<'a>, + schema_id: Option<&'a str>, +) -> Result<(referencing::Registry<'a>, referencing::Uri), referencing::Error> { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), schema_id)?; + let registry = referencing::Registry::new() + .async_retriever(config.retriever.clone()) + .draft(draft) + .add(base_uri.as_str(), resource)? + .async_prepare() + .await?; Ok((registry, base_uri)) } #[cfg(feature = "resolve-async")] pub(crate) async fn build_validator_async( - config: &ValidationOptions>, + config: &ValidationOptions<'_, Arc>, schema: &Value, ) -> Result> { let draft = config.draft_for(schema).await?; let resource_ref = draft.create_resource_ref(schema); // single computation - let resource = draft.create_resource(schema.clone()); - let (registry, base_uri) = - build_registry_async(config, draft, resource, resource_ref.id()).await?; - let vocabularies = registry.find_vocabularies(draft, schema); - let resolver = registry.resolver(base_uri); - // HACK: `ValidationOptions` struct has a default type parameter as `Arc` and to - // avoid propagating types everywhere in `Context`, it is easier to just replace the - // retriever to one that implements `Retrieve`, as it is not used anymore anyway. - let config_with_blocking_retriever = config - .clone() - .with_blocking_retriever(crate::retriever::DefaultRetriever); - let ctx = Context::new( - &config_with_blocking_retriever, - ®istry, - resolver, - vocabularies, - draft, - Location::new(), - ); if config.validate_schema { validate_schema(draft, schema)?; } - let root = compile(&ctx, resource_ref).map_err(ValidationError::to_owned)?; + if let Some(registry) = config.registry { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), resource_ref.id())?; + let overlay = registry + .add(base_uri.as_str(), resource_ref)? + .async_retriever(config.retriever.clone()) + .draft(draft) + .async_prepare() + .await?; + return build_validator_with_registry(config, schema, draft, resource_ref, &overlay); + } + + let (registry, _) = + build_registry_async(config, draft, resource_ref, resource_ref.id()).await?; + build_validator_with_registry(config, schema, draft, resource_ref, ®istry) +} + +fn build_validator_with_registry( + config: &ValidationOptions<'_, R>, + schema: &Value, + draft: Draft, + resource: ResourceRef<'_>, + registry: &referencing::Registry<'_>, +) -> Result> { + let requested_base_uri = resolve_base_uri(config.base_uri.as_ref(), resource.id())?; + let base_uri = normalized_base_uri_for_generated_registry(registry, &requested_base_uri); + let vocabularies = registry.find_vocabularies(draft, schema); + let resolver = registry.resolver(base_uri); + let ctx = Context::new(config, resolver, vocabularies, draft, Location::new()); + let root = compile(&ctx, resource).map_err(ValidationError::to_owned)?; let draft = config.draft(); Ok(Validator { root, draft }) } +pub(crate) fn normalized_base_uri_for_generated_registry( + registry: &referencing::Registry<'_>, + base_uri: &referencing::Uri, +) -> referencing::Uri { + if registry.contains_resource(base_uri.as_str()) { + return base_uri.clone(); + } + + if base_uri + .fragment() + .is_some_and(|fragment| fragment.as_str().is_empty()) + { + let mut normalized = base_uri.clone(); + normalized.set_fragment(None); + if registry.contains_resource(normalized.as_str()) { + return normalized; + } + } + + panic!("generated registry is missing root URI '{base_uri}'"); +} + +pub(crate) fn resolve_base_uri( + base_uri: Option<&String>, + schema_id: Option<&str>, +) -> Result, referencing::Error> { + if let Some(base_uri) = base_uri { + uri::from_str(base_uri) + } else { + uri::from_str(schema_id.unwrap_or(DEFAULT_BASE_URI)) + } +} + fn annotations_to_value(annotations: AHashMap) -> Arc { let mut object = Map::with_capacity(annotations.len()); for (key, value) in annotations { @@ -812,18 +864,6 @@ fn annotations_to_value(annotations: AHashMap) -> Arc { Arc::new(Value::Object(object)) } -fn collect_resource_pairs( - base_uri: &str, - resource: Resource, - resources: AHashMap, -) -> impl IntoIterator, Resource)> { - once((Cow::Borrowed(base_uri), resource)).chain( - resources - .into_iter() - .map(|(uri, resource)| (Cow::Owned(uri), resource)), - ) -} - fn validate_schema(draft: Draft, schema: &Value) -> Result<(), ValidationError<'static>> { // Boolean schemas are always valid per the spec, skip validation if schema.is_boolean() { diff --git a/crates/jsonschema/src/error.rs b/crates/jsonschema/src/error.rs index 11873c57..409b32f2 100644 --- a/crates/jsonschema/src/error.rs +++ b/crates/jsonschema/src/error.rs @@ -1790,7 +1790,7 @@ impl fmt::Display for MaskedValidationError<'_, '_, '_> { #[cfg(test)] mod tests { use super::*; - use referencing::Resource; + use referencing::{Registry, Resource}; use serde_json::json; use test_case::test_case; @@ -2288,11 +2288,16 @@ mod tests { "$ref": "https://example.com/string.json" }); let instance = serde_json::json!(42); - let validator = crate::options() - .with_resource( + let registry = Registry::new() + .add( "https://example.com/string.json", Resource::from_contents(external), ) + .expect("external schema should be accepted") + .prepare() + .expect("registry should build"); + let validator = crate::options() + .with_registry(®istry) .build(&schema) .expect("schema should compile"); let err = validator.validate(&instance).unwrap_err(); diff --git a/crates/jsonschema/src/keywords/ref_.rs b/crates/jsonschema/src/keywords/ref_.rs index 3e5d6df5..745e92d7 100644 --- a/crates/jsonschema/src/keywords/ref_.rs +++ b/crates/jsonschema/src/keywords/ref_.rs @@ -190,7 +190,7 @@ fn compile_reference_validator<'a>( Ok(resolved) => resolved.into_inner(), Err(error) => return Some(Err(ValidationError::from(error))), }; - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let resource_ref = draft.create_resource_ref(contents); let inner_ctx = ctx.with_resolver_and_draft( resolver, @@ -241,7 +241,7 @@ fn compile_recursive_validator<'a>( .lookup_recursive_reference() .map_err(ValidationError::from)?; let (contents, resolver, draft) = resolved.into_inner(); - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let resource_ref = draft.create_resource_ref(contents); let inner_ctx = ctx.with_resolver_and_draft( resolver, diff --git a/crates/jsonschema/src/keywords/unevaluated_items.rs b/crates/jsonschema/src/keywords/unevaluated_items.rs index 8cb45dbd..4eab1ae2 100644 --- a/crates/jsonschema/src/keywords/unevaluated_items.rs +++ b/crates/jsonschema/src/keywords/unevaluated_items.rs @@ -358,7 +358,7 @@ fn compile_recursive_ref<'a>( // Create context for the resolved reference and check its cache key let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); diff --git a/crates/jsonschema/src/keywords/unevaluated_properties.rs b/crates/jsonschema/src/keywords/unevaluated_properties.rs index 93cf7047..c086c65c 100644 --- a/crates/jsonschema/src/keywords/unevaluated_properties.rs +++ b/crates/jsonschema/src/keywords/unevaluated_properties.rs @@ -484,7 +484,7 @@ fn compile_ref<'a>( let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); let validators = @@ -507,7 +507,7 @@ fn compile_dynamic_ref<'a>( let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); let validators = @@ -534,7 +534,7 @@ fn compile_recursive_ref<'a>( // Create context for the resolved reference and check its cache key let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); diff --git a/crates/jsonschema/src/lib.rs b/crates/jsonschema/src/lib.rs index 8e76b8cd..1e7d6f12 100644 --- a/crates/jsonschema/src/lib.rs +++ b/crates/jsonschema/src/lib.rs @@ -531,7 +531,7 @@ //! ```rust //! # fn main() -> Result<(), Box> { //! use serde_json::json; -//! use jsonschema::Resource; +//! use jsonschema::{Registry, Resource}; //! //! // Root schema with multiple definitions //! let root_schema = json!({ @@ -559,9 +559,13 @@ //! // Create a schema that references the specific definition you want to validate against //! let user_schema = json!({"$ref": "https://example.com/root#/definitions/User"}); //! -//! // Register the root schema and build validator for the specific definition +//! let registry = Registry::new() +//! .add("https://example.com/root", root_schema)? +//! .prepare()?; +//! +//! // Build validator for the specific definition via the shared prepared registry //! let validator = jsonschema::options() -//! .with_resource("https://example.com/root", Resource::from_contents(root_schema)) +//! .with_registry(®istry) //! .build(&user_schema)?; //! //! // Now validate data against just the User definition @@ -887,7 +891,7 @@ pub use http::HttpOptions; pub use keywords::custom::Keyword; pub use options::{EmailOptions, FancyRegex, PatternOptions, Regex, ValidationOptions}; pub use referencing::{ - Draft, Error as ReferencingError, Registry, RegistryOptions, Resource, Retrieve, Uri, + Draft, Error as ReferencingError, Registry, RegistryBuilder, Resource, Retrieve, Uri, }; #[cfg(all(feature = "resolve-http", not(target_arch = "wasm32")))] pub use retriever::{HttpRetriever, HttpRetrieverError}; @@ -1165,7 +1169,7 @@ pub async fn async_validator_for(schema: &Value) -> Result ValidationOptions { +pub fn options<'i>() -> ValidationOptions<'i> { Validator::options() } @@ -1231,15 +1235,15 @@ pub fn options() -> ValidationOptions { /// See [`ValidationOptions`] for all available configuration options. #[cfg(feature = "resolve-async")] #[must_use] -pub fn async_options() -> ValidationOptions> { +pub fn async_options<'i>() -> ValidationOptions<'i, std::sync::Arc> { Validator::async_options() } /// Functionality for validating JSON Schema documents against their meta-schemas. pub mod meta { - use crate::{error::ValidationError, Draft}; + use crate::{error::ValidationError, Draft, Registry}; use ahash::AHashSet; - use referencing::{Registry, Retrieve}; + use referencing::Retrieve; use serde_json::Value; pub use validator_handle::MetaValidator; @@ -1257,10 +1261,11 @@ pub mod meta { /// "type": "object" /// })); /// - /// let registry = Registry::try_new( - /// "http://example.com/meta", - /// custom_meta - /// ).unwrap(); + /// let registry = Registry::new() + /// .add("http://example.com/meta", custom_meta) + /// .unwrap() + /// .prepare() + /// .unwrap(); /// /// let schema = json!({ /// "$schema": "http://example.com/meta", @@ -1268,21 +1273,21 @@ pub mod meta { /// }); /// /// assert!(jsonschema::meta::options() - /// .with_registry(registry) + /// .with_registry(®istry) /// .is_valid(&schema)); /// ``` #[must_use] - pub fn options() -> MetaSchemaOptions { + pub fn options<'a>() -> MetaSchemaOptions<'a> { MetaSchemaOptions::default() } /// Options for meta-schema validation. #[derive(Clone, Default)] - pub struct MetaSchemaOptions { - registry: Option, + pub struct MetaSchemaOptions<'a> { + registry: Option<&'a Registry<'a>>, } - impl MetaSchemaOptions { + impl<'a> MetaSchemaOptions<'a> { /// Use a registry for resolving custom meta-schemas. /// /// # Examples @@ -1296,16 +1301,17 @@ pub mod meta { /// "type": "object" /// })); /// - /// let registry = Registry::try_new( - /// "http://example.com/meta", - /// custom_meta - /// ).unwrap(); + /// let registry = Registry::new() + /// .add("http://example.com/meta", custom_meta) + /// .unwrap() + /// .prepare() + /// .unwrap(); /// /// let options = jsonschema::meta::options() - /// .with_registry(registry); + /// .with_registry(®istry); /// ``` #[must_use] - pub fn with_registry(mut self, registry: Registry) -> Self { + pub fn with_registry(mut self, registry: &'a Registry<'a>) -> Self { self.registry = Some(registry); self } @@ -1317,7 +1323,7 @@ pub mod meta { /// Panics if the meta-schema cannot be resolved. #[must_use] pub fn is_valid(&self, schema: &Value) -> bool { - match try_meta_validator_for(schema, self.registry.as_ref()) { + match try_meta_validator_for(schema, self.registry) { Ok(validator) => validator.as_ref().is_valid(schema), Err(e) => panic!("Failed to resolve meta-schema: {e}"), } @@ -1328,8 +1334,11 @@ pub mod meta { /// # Errors /// /// Returns [`ValidationError`] if the schema is invalid or if the meta-schema cannot be resolved. - pub fn validate<'a>(&self, schema: &'a Value) -> Result<(), ValidationError<'a>> { - let validator = try_meta_validator_for(schema, self.registry.as_ref())?; + pub fn validate<'schema>( + &self, + schema: &'schema Value, + ) -> Result<(), ValidationError<'schema>> { + let validator = try_meta_validator_for(schema, self.registry)?; validator.as_ref().validate(schema) } } @@ -1576,7 +1585,7 @@ pub mod meta { fn try_meta_validator_for<'a>( schema: &Value, - registry: Option<&Registry>, + registry: Option<&'a Registry<'a>>, ) -> Result, ValidationError<'static>> { let draft = Draft::default().detect(schema); @@ -1593,7 +1602,8 @@ pub mod meta { resolve_meta_schema_with_registry(meta_schema_uri, registry)?; let validator = crate::options() .with_draft(resolved_draft) - .with_registry(registry.clone()) + .with_registry(registry) + .with_base_uri(meta_schema_uri.trim_end_matches('#')) .without_schema_validation() .build(&custom_meta_schema)?; return Ok(MetaValidator::owned(validator)); @@ -1615,14 +1625,14 @@ pub mod meta { fn resolve_meta_schema_with_registry( uri: &str, - registry: &Registry, + registry: &Registry<'_>, ) -> Result<(Value, Draft), ValidationError<'static>> { - let resolver = registry.try_resolver(uri)?; + let resolver = registry.resolver(referencing::uri::from_str(uri)?); let first_resolved = resolver.lookup("")?; let first_meta_schema = first_resolved.contents().clone(); let draft = walk_meta_schema_chain(uri, |current_uri| { - let resolver = registry.try_resolver(current_uri)?; + let resolver = registry.resolver(referencing::uri::from_str(current_uri)?); let resolved = resolver.lookup("")?; Ok(resolved.contents().clone()) })?; @@ -1790,7 +1800,7 @@ pub mod draft4 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft4) } @@ -1971,7 +1981,7 @@ pub mod draft6 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft6) } @@ -2152,7 +2162,7 @@ pub mod draft7 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft7) } @@ -2333,7 +2343,7 @@ pub mod draft201909 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft201909) } @@ -2516,7 +2526,7 @@ pub mod draft202012 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft202012) } @@ -2770,8 +2780,7 @@ pub(crate) mod tests_util { #[cfg(test)] mod tests { - use crate::{validator_for, ValidationError}; - use referencing::{Registry, Resource}; + use crate::{validator_for, Registry, ValidationError}; use super::Draft; use serde_json::json; @@ -3233,15 +3242,14 @@ mod tests { } }); - // Register the custom meta-schema as a resource - let resources = vec![( - "http://custom.example.com/schema".to_string(), - Resource::from_contents(meta_schema), - )]; - + let registry = Registry::new() + .add("http://custom.example.com/schema", meta_schema) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let validator = crate::options() .without_schema_validation() - .with_resources(resources.into_iter()) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3367,21 +3375,14 @@ mod tests { "type": "object" }); - // Build the validator with both the meta-schema and the element schema as resources - let resources = vec![ - ( - "http://example.com/meta/schema".to_string(), - referencing::Resource::from_contents(meta_schema), - ), - ( - "http://example.com/schemas/element".to_string(), - referencing::Resource::from_contents(element_schema.clone()), - ), - ]; - + let registry = Registry::new() + .add("http://example.com/meta/schema", meta_schema) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let validator = crate::options() .without_schema_validation() - .with_resources(resources.into_iter()) + .with_registry(®istry) .build(&element_schema) .expect("Should successfully build validator with custom meta-schema"); @@ -3410,13 +3411,14 @@ mod tests { } }); - let registry = Registry::try_new( - "http://example.com/custom-with-unevaluated", - Resource::from_contents(custom_meta), - ) - .expect("Should create registry"); + let registry = Registry::new() + .add("http://example.com/custom-with-unevaluated", custom_meta) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let schema = json!({ + "$id": "http://example.com/subject", "$schema": "http://example.com/custom-with-unevaluated#", "type": "object", "properties": { @@ -3427,7 +3429,7 @@ mod tests { let validator = crate::options() .without_schema_validation() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3448,11 +3450,11 @@ mod tests { "unevaluatedProperties": false }); - let registry = Registry::try_new( - "https://json-schema.org/draft/2020-12/strict", - Resource::from_contents(strict_meta), - ) - .expect("Should create registry"); + let registry = Registry::new() + .add("https://json-schema.org/draft/2020-12/strict", strict_meta) + .expect("Should accept strict meta-schema") + .prepare() + .expect("Should create registry"); // Valid schema - all keywords are recognized let valid_schema = json!({ @@ -3464,7 +3466,7 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&valid_schema)); // Invalid schema - top-level typo "typ" instead of "type" @@ -3474,7 +3476,7 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&invalid_schema_top_level)); // Invalid schema - nested invalid keyword "minSize" (not a real JSON Schema keyword) @@ -3487,7 +3489,7 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&invalid_schema_nested)); } @@ -3506,13 +3508,14 @@ mod tests { } }); - let registry = Registry::try_new( - "http://example.com/meta/draft7-custom", - Resource::from_contents(custom_meta_draft7), - ) - .expect("Should create registry"); + let registry = Registry::new() + .add("http://example.com/meta/draft7-custom", custom_meta_draft7) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let schema = json!({ + "$id": "http://example.com/subject", "$schema": "http://example.com/meta/draft7-custom", "$ref": "#/$defs/positiveNumber", "maximum": 5, @@ -3526,7 +3529,7 @@ mod tests { let validator = crate::options() .without_schema_validation() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3537,11 +3540,11 @@ mod tests { mod meta_options_tests { use super::*; - use crate::{Registry, Resource}; + use crate::Registry; #[test] fn test_meta_options_with_registry_valid_schema() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { @@ -3550,9 +3553,13 @@ mod tests { "maxLength": { "type": "integer" } }, "additionalProperties": false - })); + }); - let registry = Registry::try_new("http://example.com/meta", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta", @@ -3561,27 +3568,31 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&schema)); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema) .is_ok()); } #[test] fn test_meta_options_with_registry_invalid_schema() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "type": { "type": "string" } }, "additionalProperties": false - })); + }); - let registry = Registry::try_new("http://example.com/meta", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta", custom_meta) + .unwrap() + .prepare() + .unwrap(); // Schema has disallowed property let schema = json!({ @@ -3591,11 +3602,11 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&schema)); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema) .is_err()); } @@ -3603,12 +3614,16 @@ mod tests { #[test] fn test_meta_options_with_registry_chain() { // Create a chain: custom-meta -> draft2020-12 - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object" - })); + }); - let registry = Registry::try_new("http://example.com/custom", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/custom", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/custom", @@ -3616,34 +3631,37 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } #[test] fn test_meta_options_with_registry_multi_level_chain() { // Create chain: schema -> meta-level-2 -> meta-level-1 -> draft2020-12 - let meta_level_1 = Resource::from_contents(json!({ + let meta_level_1 = json!({ "$id": "http://example.com/meta/level1", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "customProp": { "type": "boolean" } } - })); + }); - let meta_level_2 = Resource::from_contents(json!({ + let meta_level_2 = json!({ "$id": "http://example.com/meta/level2", "$schema": "http://example.com/meta/level1", "type": "object", "customProp": true - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta/level1", meta_level_1), - ("http://example.com/meta/level2", meta_level_2), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta/level1", meta_level_1), + ("http://example.com/meta/level2", meta_level_2), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/level2", @@ -3652,22 +3670,22 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } #[test] fn test_meta_options_with_registry_multi_document_meta_schema() { - let shared_constraints = Resource::from_contents(json!({ + let shared_constraints = json!({ "$id": "http://example.com/meta/shared", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "maxLength": { "type": "integer", "minimum": 0 } } - })); + }); - let root_meta = Resource::from_contents(json!({ + let root_meta = json!({ "$id": "http://example.com/meta/root", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", @@ -3678,13 +3696,16 @@ mod tests { "allOf": [ { "$ref": "http://example.com/meta/shared" } ] - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta/root", root_meta), - ("http://example.com/meta/shared", shared_constraints), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta/root", root_meta), + ("http://example.com/meta/shared", shared_constraints), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/root", @@ -3693,7 +3714,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .validate(&schema); assert!( @@ -3703,7 +3724,7 @@ mod tests { ); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } @@ -3733,12 +3754,16 @@ mod tests { #[test] fn test_meta_options_with_registry_missing_metaschema() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object" - })); + }); - let registry = Registry::try_new("http://example.com/meta1", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta1", custom_meta) + .unwrap() + .prepare() + .unwrap(); // Schema references a different meta-schema not in registry let schema = json!({ @@ -3747,7 +3772,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3756,23 +3781,26 @@ mod tests { #[test] fn test_meta_options_circular_reference_detection() { // Create a circular reference: meta1 -> meta2 -> meta1 - let meta1 = Resource::from_contents(json!({ + let meta1 = json!({ "$id": "http://example.com/meta1", "$schema": "http://example.com/meta2", "type": "object" - })); + }); - let meta2 = Resource::from_contents(json!({ + let meta2 = json!({ "$id": "http://example.com/meta2", "$schema": "http://example.com/meta1", "type": "object" - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta1", meta1), - ("http://example.com/meta2", meta2), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta1", meta1), + ("http://example.com/meta2", meta2), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta1", @@ -3780,7 +3808,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3810,13 +3838,17 @@ mod tests { #[test] fn test_meta_options_validate_returns_specific_errors() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "required": ["type"] - })); + }); - let registry = Registry::try_new("http://example.com/meta", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta", custom_meta) + .unwrap() + .prepare() + .unwrap(); // Schema missing required property let schema = json!({ @@ -3827,7 +3859,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3837,7 +3869,7 @@ mod tests { #[test] fn test_meta_options_builds_validator_with_resolved_draft() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$id": "http://example.com/meta/draft7-based", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", @@ -3847,10 +3879,13 @@ mod tests { "minLength": { "type": "integer" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/draft7-based", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/draft7-based", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/draft7-based", @@ -3859,7 +3894,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3867,7 +3902,7 @@ mod tests { #[test] fn test_meta_options_validator_uses_correct_draft() { - let custom_meta_draft6 = Resource::from_contents(json!({ + let custom_meta_draft6 = json!({ "$id": "http://example.com/meta/draft6-based", "$schema": "http://json-schema.org/draft-06/schema#", "type": "object", @@ -3877,11 +3912,13 @@ mod tests { "exclusiveMinimum": { "type": "number" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/draft6-based", custom_meta_draft6) - .unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/draft6-based", custom_meta_draft6) + .unwrap() + .prepare() + .unwrap(); let schema_valid_for_draft6 = json!({ "$schema": "http://example.com/meta/draft6-based", @@ -3890,7 +3927,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema_valid_for_draft6); assert!(result.is_ok()); @@ -3898,7 +3935,7 @@ mod tests { #[test] fn test_meta_options_without_schema_validation_in_built_validator() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$id": "http://example.com/meta/custom", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", @@ -3907,10 +3944,13 @@ mod tests { "type": { "type": "string" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/custom", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/custom", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/custom", @@ -3918,7 +3958,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3928,7 +3968,7 @@ mod tests { fn test_meta_validation_uses_resolved_draft_from_chain() { // Chain: user-schema -> custom-meta -> Draft 4 // Validator should use Draft 4 rules to validate the schema - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$id": "http://example.com/meta/draft4-based", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", @@ -3939,10 +3979,13 @@ mod tests { "const": { "type": "string" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/draft4-based", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/draft4-based", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/draft4-based", @@ -3951,7 +3994,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3960,16 +4003,16 @@ mod tests { #[test] fn test_meta_validation_multi_level_chain_uses_resolved_draft() { // Multi-level chain: user-schema -> meta-2 -> meta-1 -> Draft 4 - let meta_level_1 = Resource::from_contents(json!({ + let meta_level_1 = json!({ "$id": "http://example.com/meta/level1", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", "properties": { "customKeyword": { "type": "boolean" } } - })); + }); - let meta_level_2 = Resource::from_contents(json!({ + let meta_level_2 = json!({ "$id": "http://example.com/meta/level2", "$schema": "http://example.com/meta/level1", "type": "object", @@ -3981,13 +4024,16 @@ mod tests { }, "customKeyword": true, "additionalProperties": false - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta/level1", meta_level_1), - ("http://example.com/meta/level2", meta_level_2), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta/level1", meta_level_1), + ("http://example.com/meta/level2", meta_level_2), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/level2", @@ -3997,7 +4043,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -4084,7 +4130,6 @@ mod tests { #[cfg(all(test, feature = "resolve-async", not(target_family = "wasm")))] mod async_tests { - use referencing::Resource; use std::{collections::HashMap, sync::Arc}; use serde_json::json; @@ -4231,36 +4276,19 @@ mod async_tests { } #[tokio::test] - async fn test_async_with_registry() { - use crate::Registry; - - // Create a registry with initial schemas - let registry = Registry::options() - .async_retriever(TestRetriever::new()) - .build([( - "https://example.com/user.json", - Resource::from_contents(json!({ - "type": "object", - "properties": { - "name": {"type": "string"}, - "age": {"type": "integer", "minimum": 0} - }, - "required": ["name"] - })), - )]) - .await - .unwrap(); + async fn test_async_with_registry_uses_async_retriever_for_inline_only_refs() { + let registry = crate::Registry::new().prepare().unwrap(); + let schema = json!({ + "$ref": "https://example.com/user.json" + }); - // Create a validator using the pre-populated registry let validator = crate::async_options() - .with_registry(registry) - .build(&json!({ - "$ref": "https://example.com/user.json" - })) + .with_registry(®istry) + .with_retriever(TestRetriever::new()) + .build(&schema) .await .unwrap(); - // Verify that validation works with the registry assert!(validator.is_valid(&json!({ "name": "John Doe", "age": 30 diff --git a/crates/jsonschema/src/options.rs b/crates/jsonschema/src/options.rs index 4975df79..903c8f67 100644 --- a/crates/jsonschema/src/options.rs +++ b/crates/jsonschema/src/options.rs @@ -12,13 +12,13 @@ use crate::{ }; use ahash::AHashMap; use email_address::Options as EmailAddressOptions; -use referencing::{Draft, Resource, Retrieve}; +use referencing::{Draft, Retrieve}; use serde_json::Value; use std::{fmt, marker::PhantomData, sync::Arc}; /// Configuration options for JSON Schema validation. #[derive(Clone)] -pub struct ValidationOptions> { +pub struct ValidationOptions<'i, R = Arc> { pub(crate) draft: Option, content_media_type_checks: AHashMap<&'static str, Option>, content_encoding_checks_and_converters: @@ -26,9 +26,7 @@ pub struct ValidationOptions> { pub(crate) base_uri: Option, /// Retriever for external resources pub(crate) retriever: R, - /// Additional resources that should be addressable during validation. - pub(crate) resources: AHashMap, - pub(crate) registry: Option, + pub(crate) registry: Option<&'i referencing::Registry<'i>>, formats: AHashMap>, validate_formats: Option, pub(crate) validate_schema: bool, @@ -38,7 +36,7 @@ pub struct ValidationOptions> { email_options: Option, } -impl Default for ValidationOptions> { +impl Default for ValidationOptions<'_, Arc> { fn default() -> Self { ValidationOptions { draft: None, @@ -46,7 +44,6 @@ impl Default for ValidationOptions> { content_encoding_checks_and_converters: AHashMap::default(), base_uri: None, retriever: Arc::new(DefaultRetriever), - resources: AHashMap::default(), registry: None, formats: AHashMap::default(), validate_formats: None, @@ -60,7 +57,7 @@ impl Default for ValidationOptions> { } #[cfg(feature = "resolve-async")] -impl Default for ValidationOptions> { +impl Default for ValidationOptions<'_, Arc> { fn default() -> Self { ValidationOptions { draft: None, @@ -68,7 +65,6 @@ impl Default for ValidationOptions> { content_encoding_checks_and_converters: AHashMap::default(), base_uri: None, retriever: Arc::new(DefaultRetriever), - resources: AHashMap::default(), registry: None, formats: AHashMap::default(), validate_formats: None, @@ -81,11 +77,34 @@ impl Default for ValidationOptions> { } } -impl ValidationOptions { +impl<'i, R> ValidationOptions<'i, R> { /// Return the draft version, or the default if not set. pub(crate) fn draft(&self) -> Draft { self.draft.unwrap_or_default() } + + pub(crate) fn compiler_pattern_options(&self) -> PatternEngineOptions { + self.pattern_options + } + + pub(crate) fn compiler_email_options(&self) -> Option<&EmailAddressOptions> { + self.email_options.as_ref() + } + + pub(crate) fn resolve_draft_from_registry( + uri: &str, + registry: &referencing::Registry<'_>, + ) -> Result { + let uri = uri.trim_end_matches('#'); + // Walk the meta-schema chain to find the underlying draft. + crate::meta::walk_meta_schema_chain(uri, |current_uri| { + let uri = referencing::uri::from_str(current_uri)?; + let resolver = registry.resolver(uri); + let resolved = resolver.lookup("")?; + Ok(resolved.contents().clone()) + }) + } + /// Sets the JSON Schema draft version. /// /// ```rust @@ -274,99 +293,8 @@ impl ValidationOptions { self.base_uri = Some(base_uri.into()); self } - /// Add a custom schema, allowing it to be referenced by the specified URI during validation. - /// - /// This enables the use of additional in-memory schemas alongside the main schema being validated. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::Resource; - /// - /// let extra = Resource::from_contents(json!({"minimum": 5})); - /// - /// let validator = jsonschema::options() - /// .with_resource("urn:minimum-schema", extra) - /// .build(&json!({"$ref": "urn:minimum-schema"}))?; - /// assert!(validator.is_valid(&json!(5))); - /// assert!(!validator.is_valid(&json!(4))); - /// # Ok(()) - /// # } - /// ``` #[must_use] - pub fn with_resource(mut self, uri: impl Into, resource: Resource) -> Self { - self.resources.insert(uri.into(), resource); - self - } - /// Add custom schemas, allowing them to be referenced by the specified URI during validation. - /// - /// This enables the use of additional in-memory schemas alongside the main schema being validated. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::Resource; - /// - /// let validator = jsonschema::options() - /// .with_resources([ - /// ( - /// "urn:minimum-schema", - /// Resource::from_contents(json!({"minimum": 5})), - /// ), - /// ( - /// "urn:maximum-schema", - /// Resource::from_contents(json!({"maximum": 10})), - /// ), - /// ].into_iter()) - /// .build(&json!({"$ref": "urn:minimum-schema"}))?; - /// assert!(validator.is_valid(&json!(5))); - /// assert!(!validator.is_valid(&json!(4))); - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_resources( - mut self, - pairs: impl Iterator, Resource)>, - ) -> Self { - for (uri, resource) in pairs { - self.resources.insert(uri.into(), resource); - } - self - } - /// Use external schema resources from the registry, making them accessible via references - /// during validation. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::{Registry, Resource}; - /// - /// let registry = Registry::try_new( - /// "urn:name-schema", - /// Resource::from_contents(json!({"type": "string"})) - /// )?; - /// let schema = json!({ - /// "properties": { - /// "name": { "$ref": "urn:name-schema" } - /// } - /// }); - /// let validator = jsonschema::options() - /// .with_registry(registry) - /// .build(&schema)?; - /// assert!(validator.is_valid(&json!({ "name": "Valid String" }))); - /// assert!(!validator.is_valid(&json!({ "name": 123 }))); - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_registry(mut self, registry: referencing::Registry) -> Self { + pub fn with_registry(mut self, registry: &'i referencing::Registry<'i>) -> Self { self.registry = Some(registry); self } @@ -500,7 +428,7 @@ impl ValidationOptions { } } -impl ValidationOptions> { +impl ValidationOptions<'_, Arc> { /// Build a JSON Schema validator using the current options. /// /// If no draft is set via [`with_draft`](Self::with_draft), the draft is auto-detected @@ -572,15 +500,14 @@ impl ValidationOptions> { let default = Draft::default(); let detected = default.detect(contents); - // If detected draft is Unknown (custom meta-schema), try to resolve it + // If detected draft is Unknown (custom meta-schema), try to resolve it. if detected == Draft::Unknown { - if let Some(registry) = &self.registry { + if let Some(registry) = self.registry { if let Some(meta_schema_uri) = contents .as_object() .and_then(|obj| obj.get("$schema")) .and_then(|s| s.as_str()) { - // Walk the meta-schema chain to find the underlying draft return Self::resolve_draft_from_registry(meta_schema_uri, registry); } } @@ -590,17 +517,6 @@ impl ValidationOptions> { } } - fn resolve_draft_from_registry( - uri: &str, - registry: &referencing::Registry, - ) -> Result { - let uri = uri.trim_end_matches('#'); - crate::meta::walk_meta_schema_chain(uri, |current_uri| { - let resolver = registry.try_resolver(current_uri)?; - let resolved = resolver.lookup("")?; - Ok(resolved.contents().clone()) - }) - } /// Set a retriever to fetch external resources. #[must_use] pub fn with_retriever(mut self, retriever: impl Retrieve + 'static) -> Self { @@ -678,9 +594,6 @@ impl ValidationOptions> { self.pattern_options = options.inner; self } - pub(crate) fn pattern_options(&self) -> PatternEngineOptions { - self.pattern_options - } /// Set email validation options to customize email format validation behavior. /// @@ -702,14 +615,10 @@ impl ValidationOptions> { self.email_options = Some(options.inner); self } - - pub(crate) fn email_options(&self) -> Option<&EmailAddressOptions> { - self.email_options.as_ref() - } } #[cfg(feature = "resolve-async")] -impl ValidationOptions> { +impl<'i> ValidationOptions<'i, Arc> { /// Build a JSON Schema validator using the current async options. /// /// # Errors @@ -736,14 +645,13 @@ impl ValidationOptions> { pub fn with_retriever( self, retriever: impl referencing::AsyncRetrieve + 'static, - ) -> ValidationOptions> { + ) -> ValidationOptions<'i, Arc> { ValidationOptions { draft: self.draft, retriever: Arc::new(retriever), content_media_type_checks: self.content_media_type_checks, content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, base_uri: None, - resources: self.resources, registry: self.registry, formats: self.formats, validate_formats: self.validate_formats, @@ -758,40 +666,31 @@ impl ValidationOptions> { pub(crate) async fn draft_for(&self, contents: &Value) -> Result { // Preference: // - Explicitly set - // - Autodetected + // - Autodetected (with registry resolution for custom meta-schemas) // - Default if let Some(draft) = self.draft { Ok(draft) } else { let default = Draft::default(); - Ok(default.detect(contents)) - } - } - /// Set a retriever to fetch external resources. - pub(crate) fn with_blocking_retriever( - self, - retriever: impl Retrieve + 'static, - ) -> ValidationOptions> { - ValidationOptions { - draft: self.draft, - retriever: Arc::new(retriever), - content_media_type_checks: self.content_media_type_checks, - content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, - base_uri: None, - resources: self.resources, - registry: self.registry, - formats: self.formats, - validate_formats: self.validate_formats, - validate_schema: self.validate_schema, - ignore_unknown_formats: self.ignore_unknown_formats, - keywords: self.keywords, - pattern_options: self.pattern_options, - email_options: self.email_options, + let detected = default.detect(contents); + // If detected draft is Unknown (custom meta-schema), try to resolve it. + if detected == Draft::Unknown { + if let Some(registry) = self.registry { + if let Some(meta_schema_uri) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|s| s.as_str()) + { + return Self::resolve_draft_from_registry(meta_schema_uri, registry); + } + } + } + Ok(detected) } } } -impl fmt::Debug for ValidationOptions { +impl fmt::Debug for ValidationOptions<'_, Arc> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("CompilationConfig") .field("draft", &self.draft) @@ -1160,8 +1059,8 @@ impl From for EmailOptions { #[cfg(test)] mod tests { use super::*; - use referencing::{Registry, Resource}; - use serde_json::json; + use referencing::{Registry, Resource, Retrieve, Uri}; + use serde_json::{json, Value}; fn custom(s: &str) -> bool { s.ends_with("42!") @@ -1181,24 +1080,53 @@ mod tests { #[test] fn with_registry() { - let registry = Registry::try_new( - "urn:name-schema", - Resource::from_contents(json!({"type": "string"})), - ) - .expect("Invalid URI"); + let registry = Registry::new() + .add("urn:name-schema", json!({"type": "string"})) + .expect("Invalid URI") + .prepare() + .expect("Registry should prepare"); let schema = json!({ "properties": { "name": { "$ref": "urn:name-schema" } } }); let validator = crate::options() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Invalid schema"); assert!(validator.is_valid(&json!({ "name": "Valid String" }))); assert!(!validator.is_valid(&json!({ "name": 123 }))); } + struct InlineOnlyRetriever; + + impl Retrieve for InlineOnlyRetriever { + fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + if uri.as_str() == "https://example.com/string.json" { + Ok(json!({"type": "string"})) + } else { + Err(format!("unexpected retrieval for {}", uri.as_str()).into()) + } + } + } + + #[test] + fn with_registry_uses_validation_options_retriever_for_inline_only_refs() { + let shared = Registry::new().prepare().expect("Registry should prepare"); + let schema = json!({"$ref": "https://example.com/string.json"}); + let validator = crate::options() + .with_registry(&shared) + .with_retriever(InlineOnlyRetriever) + .build(&schema) + .expect("Validator should build using the options retriever"); + + assert!(validator.is_valid(&json!("Valid String"))); + assert!(!validator.is_valid(&json!(123))); + } + #[test] fn test_fancy_regex_options_builder() { let options = PatternOptions::fancy_regex() @@ -1226,19 +1154,6 @@ mod tests { let _options = crate::options().with_draft(Draft::Unknown); } - #[test] - fn custom_meta_schema_allowed_when_draft_overridden() { - let schema = json!({ - "$schema": "json-schema:///custom/meta", - "type": "string" - }); - - crate::options() - .with_draft(Draft::Draft7) - .build(&schema) - .expect("Explicit draft override should bypass custom meta-schema registry checks"); - } - #[test] fn test_regex_options_builder() { let options = PatternOptions::regex() @@ -1285,12 +1200,16 @@ mod tests { // Create a schema that references the specific definition let user_schema = json!({"$ref": "https://example.com/root#/definitions/User"}); - // Build validator with the root schema registered as a resource - let validator = crate::options() - .with_resource( + let registry = Registry::new() + .add( "https://example.com/root", Resource::from_contents(root_schema), ) + .unwrap() + .prepare() + .unwrap(); + let validator = crate::options() + .with_registry(®istry) .build(&user_schema) .expect("Valid schema"); diff --git a/crates/jsonschema/src/paths.rs b/crates/jsonschema/src/paths.rs index b1a20b60..edbb669a 100644 --- a/crates/jsonschema/src/paths.rs +++ b/crates/jsonschema/src/paths.rs @@ -5,7 +5,9 @@ use std::{ sync::{Arc, OnceLock}, }; -use referencing::unescape_segment; +use referencing::{ + unescape_segment, write_escaped_str, write_index, JsonPointerNode, JsonPointerSegment, +}; use crate::keywords::Keyword; @@ -31,39 +33,7 @@ impl fmt::Display for LocationSegment<'_> { /// /// [`LazyLocation`] builds a path incrementally during JSON Schema validation without allocating /// memory until required by storing each segment on the stack. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct LazyLocation<'a, 'b> { - pub(crate) segment: LocationSegment<'a>, - pub(crate) parent: Option<&'b LazyLocation<'b, 'a>>, -} - -impl Default for LazyLocation<'_, '_> { - fn default() -> Self { - LazyLocation::new() - } -} - -impl<'a> LazyLocation<'a, '_> { - /// Create a root node of a JSON pointer. - #[must_use] - pub const fn new() -> Self { - LazyLocation { - // The value does not matter, it will never be used - segment: LocationSegment::Index(0), - parent: None, - } - } - - /// Push a new segment to the JSON pointer. - #[inline] - #[must_use] - pub fn push(&'a self, segment: impl Into>) -> Self { - LazyLocation { - segment: segment.into(), - parent: Some(self), - } - } -} +pub type LazyLocation<'a, 'b> = JsonPointerNode<'a, 'b>; /// Cached empty location - very common for root-level errors. static EMPTY_LOCATION: OnceLock = OnceLock::new(); @@ -91,21 +61,25 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { const STACK_CAPACITY: usize = 16; // Fast path: empty location - if value.parent.is_none() { + if value.parent().is_none() { return Location::new(); } // Fast path: single index segment (very common for array validation) // Use cached locations for indices 0-15 to avoid allocation - if let Some(parent) = value.parent { - if parent.parent.is_none() { - if let LocationSegment::Index(idx) = &value.segment { + if let Some(parent) = value.parent() { + if parent.parent().is_none() { + if let JsonPointerSegment::Index(idx) = value.segment() { if *idx < 16 { return get_cached_index_paths()[*idx].clone(); } // Single index > 15: compute directly - let mut buf = itoa::Buffer::new(); - return Location(Arc::from(format!("/{}", buf.format(*idx)))); + let mut idx_buffer = itoa::Buffer::new(); + let idx = idx_buffer.format(*idx); + let mut buffer = String::with_capacity(1 + idx.len()); + buffer.push('/'); + buffer.push_str(idx); + return Location(Arc::from(buffer)); } } } @@ -117,11 +91,11 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { let mut string_capacity = 0; let mut head = value; - while let Some(next) = head.parent { + while let Some(next) = head.parent() { capacity += 1; - string_capacity += match &head.segment { - LocationSegment::Property(property) => property.len() + 1, - LocationSegment::Index(idx) => idx.checked_ilog10().unwrap_or(0) as usize + 2, + string_capacity += match head.segment() { + JsonPointerSegment::Key(property) => property.len() + 1, + JsonPointerSegment::Index(idx) => idx.checked_ilog10().unwrap_or(0) as usize + 2, }; head = next; } @@ -130,20 +104,20 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { if capacity <= STACK_CAPACITY { // Stack-allocated storage with references - no cloning needed - let mut stack_segments: [Option<&LocationSegment<'_>>; STACK_CAPACITY] = + let mut stack_segments: [Option<&JsonPointerSegment<'_>>; STACK_CAPACITY] = [None; STACK_CAPACITY]; let mut idx = 0; head = value; - if head.parent.is_some() { - stack_segments[idx] = Some(&head.segment); + if head.parent().is_some() { + stack_segments[idx] = Some(head.segment()); idx += 1; } - while let Some(next) = head.parent { + while let Some(next) = head.parent() { head = next; - if head.parent.is_some() { - stack_segments[idx] = Some(&head.segment); + if head.parent().is_some() { + stack_segments[idx] = Some(head.segment()); idx += 1; } } @@ -152,40 +126,38 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { for segment in stack_segments[..idx].iter().rev().flatten() { buffer.push('/'); match segment { - LocationSegment::Property(property) => { + JsonPointerSegment::Key(property) => { write_escaped_str(&mut buffer, property); } - LocationSegment::Index(idx) => { - let mut itoa_buffer = itoa::Buffer::new(); - buffer.push_str(itoa_buffer.format(*idx)); + JsonPointerSegment::Index(idx) => { + write_index(&mut buffer, *idx); } } } } else { // Heap-allocated fallback for deep paths (>16 segments) - let mut segments: Vec<&LocationSegment<'_>> = Vec::with_capacity(capacity); + let mut segments: Vec<&JsonPointerSegment<'_>> = Vec::with_capacity(capacity); head = value; - if head.parent.is_some() { - segments.push(&head.segment); + if head.parent().is_some() { + segments.push(head.segment()); } - while let Some(next) = head.parent { + while let Some(next) = head.parent() { head = next; - if head.parent.is_some() { - segments.push(&head.segment); + if head.parent().is_some() { + segments.push(head.segment()); } } for segment in segments.iter().rev() { buffer.push('/'); match segment { - LocationSegment::Property(property) => { + JsonPointerSegment::Key(property) => { write_escaped_str(&mut buffer, property); } - LocationSegment::Index(idx) => { - let mut itoa_buffer = itoa::Buffer::new(); - buffer.push_str(itoa_buffer.format(*idx)); + JsonPointerSegment::Index(idx) => { + write_index(&mut buffer, *idx); } } } @@ -569,6 +541,15 @@ impl From for LocationSegment<'_> { } } +impl<'a> From> for JsonPointerSegment<'a> { + fn from(value: LocationSegment<'a>) -> Self { + match value { + LocationSegment::Property(property) => JsonPointerSegment::Key(property), + LocationSegment::Index(idx) => JsonPointerSegment::Index(idx), + } + } +} + /// A cheap to clone JSON pointer that represents location with a JSON value. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Location(Arc); @@ -622,9 +603,13 @@ impl Location { Self(Arc::from(buffer)) } LocationSegment::Index(idx) => { - let mut itoa_buf = itoa::Buffer::new(); - let segment = itoa_buf.format(idx); - Self(format!("{parent}/{segment}").into()) + let mut idx_buffer = itoa::Buffer::new(); + let idx = idx_buffer.format(idx); + let mut buffer = String::with_capacity(parent.len() + idx.len() + 1); + buffer.push_str(parent); + buffer.push('/'); + buffer.push_str(idx); + Self(Arc::from(buffer)) } } } @@ -651,44 +636,6 @@ impl Location { } } -pub fn write_escaped_str(buffer: &mut String, value: &str) { - match value.find(['~', '/']) { - Some(mut escape_idx) => { - let mut remaining = value; - - // Loop through the string to replace `~` and `/` - loop { - let (before, after) = remaining.split_at(escape_idx); - // Copy everything before the escape char - buffer.push_str(before); - - // Append the appropriate escape sequence - match after.as_bytes()[0] { - b'~' => buffer.push_str("~0"), - b'/' => buffer.push_str("~1"), - _ => unreachable!(), - } - - // Move past the escaped character - remaining = &after[1..]; - - // Find the next `~` or `/` to continue escaping - if let Some(next_escape_idx) = remaining.find(['~', '/']) { - escape_idx = next_escape_idx; - } else { - // Append any remaining part of the string - buffer.push_str(remaining); - break; - } - } - } - None => { - // If no escape characters are found, append the segment as is - buffer.push_str(value); - } - } -} - impl Default for Location { fn default() -> Self { Self::new() @@ -784,6 +731,17 @@ mod tests { assert_eq!(location.as_str(), expected); } + #[test] + fn test_lazy_location_converts_to_referencing_owned_json_pointer() { + let root = LazyLocation::new(); + let nested = root.push("foo/bar~baz"); + let loc = nested.push(2usize); + + let pointer = referencing::OwnedJsonPointer::from(&loc); + + assert_eq!(pointer.as_str(), "/foo~1bar~0baz/2"); + } + #[test] fn test_location_join_multiple() { let loc = Location::new(); diff --git a/crates/jsonschema/src/retriever.rs b/crates/jsonschema/src/retriever.rs index 4b7698d1..7f6cec78 100644 --- a/crates/jsonschema/src/retriever.rs +++ b/crates/jsonschema/src/retriever.rs @@ -829,19 +829,11 @@ mod async_tests { } }); - // Create registry with default async retriever - let registry = Registry::options() - .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema", - crate::Draft::Draft202012.create_resource(schema.clone()), - )]) - .await - .expect("Registry creation failed"); - - let validator = crate::options() - .with_registry(registry) + let validator = crate::async_options() + .with_base_uri("http://example.com/schema") + .with_retriever(DefaultRetriever) .build(&schema) + .await .expect("Invalid schema"); let valid = json!({"user": {"name": "John Doe"}}); @@ -860,12 +852,11 @@ mod async_tests { } }); - let result = Registry::options() + let result = Registry::new() .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema", - crate::Draft::Draft202012.create_resource(schema), - )]) + .add("http://example.com/schema", schema) + .expect("Resource should be accepted") + .async_prepare() .await; assert!(result.is_err()); @@ -903,18 +894,11 @@ mod async_tests { } }); - let registry = Registry::options() - .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema", - crate::Draft::Draft202012.create_resource(schema.clone()), - )]) - .await - .expect("Registry creation failed"); - - let validator = crate::options() - .with_registry(registry) + let validator = crate::async_options() + .with_base_uri("http://example.com/schema") + .with_retriever(DefaultRetriever) .build(&schema) + .await .expect("Invalid schema"); let valid = json!({ diff --git a/crates/jsonschema/src/validator.rs b/crates/jsonschema/src/validator.rs index 7db3a940..a80ffd0c 100644 --- a/crates/jsonschema/src/validator.rs +++ b/crates/jsonschema/src/validator.rs @@ -289,7 +289,7 @@ impl Validator { /// .build(&schema); /// ``` #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { ValidationOptions::default() } /// Create a default [`ValidationOptions`] configured for async validation. @@ -318,7 +318,8 @@ impl Validator { /// For sync validation, use [`options()`](crate::options()) instead. #[cfg(feature = "resolve-async")] #[must_use] - pub fn async_options() -> ValidationOptions> { + pub fn async_options<'i>( + ) -> ValidationOptions<'i, std::sync::Arc> { ValidationOptions::default() } /// Create a validator using the default options. diff --git a/crates/jsonschema/tests/bundle.rs b/crates/jsonschema/tests/bundle.rs index 38b549c3..cd619096 100644 --- a/crates/jsonschema/tests/bundle.rs +++ b/crates/jsonschema/tests/bundle.rs @@ -1,7 +1,38 @@ -use jsonschema::ReferencingError; -use referencing::Resource; +use jsonschema::{ReferencingError, Registry}; use serde_json::{json, Value}; +const TEST_ROOT_URI: &str = "urn:jsonschema:test:root"; + +fn registry_from_resources<'a>(resources: &'a [(&str, Value)]) -> Registry<'a> { + let mut registry = jsonschema::Registry::new(); + for (uri, schema) in resources { + registry = registry + .add(*uri, schema) + .expect("resource should be accepted"); + } + registry.prepare().expect("registry build failed") +} + +fn try_bundle_with_resources( + root: &Value, + resources: &[(&str, Value)], +) -> Result { + let registry = registry_from_resources(resources); + jsonschema::options() + .with_registry(®istry) + .with_base_uri(TEST_ROOT_URI) + .bundle(root) +} + +fn validator_with_resources(root: &Value, resources: &[(&str, Value)]) -> jsonschema::Validator { + let registry = registry_from_resources(resources); + jsonschema::options() + .with_registry(®istry) + .with_base_uri(TEST_ROOT_URI) + .build(root) + .expect("distributed compile failed") +} + #[cfg(all(feature = "resolve-async", not(target_arch = "wasm32")))] mod async_tests { use super::*; @@ -12,21 +43,21 @@ mod async_tests { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); + let registry = jsonschema::Registry::new() + .add("https://example.com/person.json", person_schema()) + .expect("resource should be accepted") + .prepare() + .expect("registry build failed"); let bundled = jsonschema::async_options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_schema()), - ) + .with_registry(®istry) + .with_base_uri(TEST_ROOT_URI) .bundle(&schema) .await .expect("async bundle failed"); - assert_eq!( - bundled.get("$ref"), - Some(&json!("https://example.com/person.json")) - ); - let defs = bundled.get("$defs").unwrap().as_object().unwrap(); - assert!(defs.contains_key("https://example.com/person.json")); + assert_eq!(bundled["$ref"], json!("https://example.com/person.json")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["https://example.com/person.json"].is_null()); } #[tokio::test] @@ -74,27 +105,19 @@ fn test_bundle_single_external_ref() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_schema()), - ) - .bundle(&schema) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &schema, + &[("https://example.com/person.json", person_schema())], + ) + .expect("bundle failed"); // $ref MUST NOT be rewritten (spec requirement) - assert_eq!( - bundled.get("$ref"), - Some(&json!("https://example.com/person.json")) - ); - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); - assert!(defs.contains_key("https://example.com/person.json")); + assert_eq!(bundled["$ref"], json!("https://example.com/person.json")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["https://example.com/person.json"].is_null()); // embedded resource MUST have $id let embedded = &defs["https://example.com/person.json"]; - assert_eq!( - embedded.get("$id"), - Some(&json!("https://example.com/person.json")) - ); + assert_eq!(embedded["$id"], json!("https://example.com/person.json")); } #[test] @@ -103,13 +126,11 @@ fn test_bundle_validates_identically() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_schema()), - ) - .bundle(&schema) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &schema, + &[("https://example.com/person.json", person_schema())], + ) + .expect("bundle failed"); let validator = jsonschema::validator_for(&bundled).expect("compile bundled failed"); assert!(validator.is_valid(&json!({"name": "Alice"}))); @@ -145,25 +166,22 @@ fn test_bundle_transitive_refs() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_with_address), - ) - .with_resource( - "https://example.com/address.json", - Resource::from_contents(address_schema), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[ + ("https://example.com/person.json", person_with_address), + ("https://example.com/address.json", address_schema), + ], + ) + .expect("bundle failed"); - let defs = bundled.get("$defs").unwrap().as_object().unwrap(); + let defs = bundled["$defs"].as_object().unwrap(); assert!( - defs.contains_key("https://example.com/person.json"), + !defs["https://example.com/person.json"].is_null(), "person missing" ); assert!( - defs.contains_key("https://example.com/address.json"), + !defs["https://example.com/address.json"].is_null(), "address missing" ); } @@ -181,17 +199,13 @@ fn test_bundle_circular_ref() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/node.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/node.json", - Resource::from_contents(node_schema), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = + try_bundle_with_resources(&root, &[("https://example.com/node.json", node_schema)]) + .expect("bundle failed"); - let defs = bundled.get("$defs").unwrap().as_object().unwrap(); + let defs = bundled["$defs"].as_object().unwrap(); assert_eq!(defs.len(), 1, "node.json should appear exactly once"); - assert!(defs.contains_key("https://example.com/node.json")); + assert!(!defs["https://example.com/node.json"].is_null()); } /// A `$ref` like `https://example.com/schema.json#/$defs/Name` should embed @@ -210,12 +224,7 @@ fn test_bundle_fragment_qualified_external_ref() { "name": { "$ref": "https://example.com/schema.json#/$defs/Name" } } }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/schema.json", - referencing::Resource::from_contents(schemas), - ) - .bundle(&root) + let bundled = try_bundle_with_resources(&root, &[("https://example.com/schema.json", schemas)]) .expect("bundle failed"); // $ref must NOT be rewritten @@ -225,8 +234,8 @@ fn test_bundle_fragment_qualified_external_ref() { json!("https://example.com/schema.json#/$defs/Name") ); // The whole schema.json document is embedded - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); - assert!(defs.contains_key("https://example.com/schema.json")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["https://example.com/schema.json"].is_null()); } /// An external schema that internally uses a relative $ref should have its @@ -251,25 +260,22 @@ fn test_bundle_relative_ref_inside_external_schema() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/schemas/address.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/schemas/address.json", - referencing::Resource::from_contents(address_schema), - ) - .with_resource( - "https://example.com/schemas/country.json", - referencing::Resource::from_contents(country_schema), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[ + ("https://example.com/schemas/address.json", address_schema), + ("https://example.com/schemas/country.json", country_schema), + ], + ) + .expect("bundle failed"); - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); + let defs = bundled["$defs"].as_object().unwrap(); assert!( - defs.contains_key("https://example.com/schemas/address.json"), + !defs["https://example.com/schemas/address.json"].is_null(), "address missing" ); assert!( - defs.contains_key("https://example.com/schemas/country.json"), + !defs["https://example.com/schemas/country.json"].is_null(), "country missing (transitive)" ); } @@ -281,17 +287,14 @@ fn test_bundle_inner_ref_not_rewritten() { let middle = json!({ "$id": "https://example.com/middle", "$ref": "https://example.com/leaf", "maximum": 100 }); let root = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/middle" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/leaf", - referencing::Resource::from_contents(leaf), - ) - .with_resource( - "https://example.com/middle", - referencing::Resource::from_contents(middle), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[ + ("https://example.com/leaf", leaf), + ("https://example.com/middle", middle), + ], + ) + .expect("bundle failed"); assert_eq!( bundled["$ref"], @@ -321,18 +324,16 @@ fn test_bundle_resolves_ref_with_nested_id_scope() { } }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/A/b.json", - Resource::from_contents(nested_dependency), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[("https://example.com/A/b.json", nested_dependency)], + ) + .expect("bundle failed"); - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); - assert!(defs.contains_key("A")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["A"].is_null()); assert!( - defs.contains_key("https://example.com/A/b.json"), + !defs["https://example.com/A/b.json"].is_null(), "nested dependency was not embedded" ); } @@ -362,30 +363,27 @@ fn test_bundle_supports_legacy_drafts_using_definitions() { "$schema": schema_uri, "$ref": "https://example.com/person.json" }); - - let bundled = jsonschema::options() - .with_resource( + let bundled = try_bundle_with_resources( + &schema, + &[( "https://example.com/person.json", - Resource::from_contents(json!({ + json!({ "$id": "https://example.com/person.json", "$schema": schema_uri, "type": "object", "properties": { "name": { "type": "string" } } - })), - ) - .bundle(&schema) - .expect("bundle failed"); + }), + )], + ) + .expect("bundle failed"); assert!( bundled.get("$defs").is_none(), "unexpected $defs for {schema_uri}" ); - let definitions = bundled - .get("definitions") - .and_then(Value::as_object) - .expect("no definitions object"); + let definitions = bundled["definitions"].as_object().unwrap(); assert!( - definitions.contains_key("https://example.com/person.json"), + !definitions["https://example.com/person.json"].is_null(), "missing bundled resource for {schema_uri}" ); } @@ -397,23 +395,21 @@ fn test_bundle_draft4_embedded_resource_uses_id_keyword() { "$schema": "http://json-schema.org/draft-04/schema#", "$ref": "https://example.com/integer.json" }); - let bundled = jsonschema::options() - .with_resource( + let bundled = try_bundle_with_resources( + &root, + &[( "https://example.com/integer.json", - Resource::from_contents(json!({ + json!({ "$schema": "http://json-schema.org/draft-04/schema#", "type": "integer" - })), - ) - .bundle(&root) - .expect("bundle failed"); + }), + )], + ) + .expect("bundle failed"); let embedded = &bundled["definitions"]["https://example.com/integer.json"]; - assert_eq!( - embedded.get("id"), - Some(&json!("https://example.com/integer.json")) - ); - assert!(embedded.get("$id").is_none()); + assert_eq!(embedded["id"], json!("https://example.com/integer.json")); + assert!(embedded["$id"].is_null()); } #[test] @@ -519,21 +515,13 @@ fn test_bundle_202012_reuses_existing_definitions_container() { "type": "string" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/ext.json", - Resource::from_contents(external.clone()), - ) - .bundle(&root) + let bundled = try_bundle_with_resources(&root, &[("https://example.com/ext.json", external)]) .expect("bundle failed"); assert!(bundled.get("$defs").is_none(), "unexpected $defs created"); - let definitions = bundled - .get("definitions") - .and_then(Value::as_object) - .expect("missing definitions"); - assert!(definitions.contains_key("localInt")); - assert!(definitions.contains_key("https://example.com/ext.json")); + let definitions = bundled["definitions"].as_object().unwrap(); + assert!(!definitions["localInt"].is_null()); + assert!(!definitions["https://example.com/ext.json"].is_null()); let validator = jsonschema::validator_for(&bundled).expect("bundled compile failed"); assert!(validator.is_valid(&json!({"local": 1, "external": "ok"}))); @@ -550,24 +538,21 @@ fn test_bundle_draft7_keeps_existing_defs_but_adds_definitions_for_resolution() } }); - let bundled = jsonschema::options() - .with_resource( + let bundled = try_bundle_with_resources( + &root, + &[( "https://example.com/ext.json", - Resource::from_contents(json!({ + json!({ "$schema": "http://json-schema.org/draft-07/schema#", "type": "integer" - })), - ) - .bundle(&root) - .expect("bundle failed"); + }), + )], + ) + .expect("bundle failed"); - assert!(bundled.get("$defs").is_some(), "existing $defs should stay"); + assert!(bundled["$defs"].is_object(), "existing $defs should stay"); assert!( - bundled - .get("definitions") - .and_then(Value::as_object) - .and_then(|defs| defs.get("https://example.com/ext.json")) - .is_some(), + !bundled["definitions"]["https://example.com/ext.json"].is_null(), "draft-07 bundles must embed into definitions for resolvability" ); @@ -582,19 +567,8 @@ fn assert_bundle_parity( valid_instances: &[Value], invalid_instances: &[Value], ) { - // Validator from distributed schemas (registered individually) - let mut opts = jsonschema::options(); - for (uri, schema) in resources { - opts = opts.with_resource(*uri, Resource::from_contents(schema.clone())); - } - let distributed = opts.build(root).expect("distributed compile failed"); - - // Validator from bundled schema - let mut bundle_opts = jsonschema::options(); - for (uri, schema) in resources { - bundle_opts = bundle_opts.with_resource(*uri, Resource::from_contents(schema.clone())); - } - let bundled = bundle_opts.bundle(root).expect("bundle failed"); + let distributed = validator_with_resources(root, resources); + let bundled = try_bundle_with_resources(root, resources).expect("bundle failed"); let bundled_validator = jsonschema::validator_for(&bundled).expect("bundled compile failed"); for instance in valid_instances { @@ -719,26 +693,20 @@ fn test_parity_merge_with_existing_defs() { ); } -/// Walk recurses into embedded schemas; an unresolvable $ref inside one must propagate. +/// Missing refs reachable from the shared registry fail during registry preparation. #[test] -fn test_bundle_error_propagates_from_recursive_walk() { +fn test_registry_prepare_error_propagates_for_missing_transitive_ref() { // `middle` is registered, but it references `leaf` which is not registered. - // The walk recurses into `middle` and fails when resolving `leaf`. + // Preparation should fail before bundling starts. let middle = json!({ "$id": "https://example.com/middle.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/leaf.json" }); - let root = json!({ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "https://example.com/middle.json" - }); - let result = jsonschema::options() - .with_resource( - "https://example.com/middle.json", - Resource::from_contents(middle), - ) - .bundle(&root); + let result = jsonschema::Registry::new() + .add("https://example.com/middle.json", middle) + .expect("resource should be accepted") + .prepare(); assert!( matches!(result, Err(ReferencingError::Unretrievable { .. })), "expected Unretrievable, got: {result:?}" diff --git a/crates/jsonschema/tests/suite b/crates/jsonschema/tests/suite index e819f329..54ed4d1f 160000 --- a/crates/jsonschema/tests/suite +++ b/crates/jsonschema/tests/suite @@ -1 +1 @@ -Subproject commit e819f329268130e0ed5bbc87b071c83d8e02a68a +Subproject commit 54ed4d1fb0ac8ab16c453df5db8b5e81689c6fed diff --git a/fuzz/fuzz_targets/referencing.rs b/fuzz/fuzz_targets/referencing.rs index 863fd1f8..33491665 100644 --- a/fuzz/fuzz_targets/referencing.rs +++ b/fuzz/fuzz_targets/referencing.rs @@ -1,6 +1,6 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use referencing::{Draft, Registry}; +use referencing::{uri, Draft, Registry, RegistryBuilder}; fuzz_target!(|data: (&[u8], &[u8], &[u8])| { let (schema, base, reference) = data; @@ -14,13 +14,14 @@ fuzz_target!(|data: (&[u8], &[u8], &[u8])| { Draft::Draft201909, Draft::Draft202012, ] { - let resource = draft.create_resource(schema.clone()); - if let Ok(registry) = Registry::try_new(base, resource) { - if let Ok(resolver) = - registry.try_resolver("http://example.com/schema.json") - { - let _resolved = resolver.lookup(reference); - } + if let Ok(registry) = Registry::new() + .draft(draft) + .add(base, &schema) + .and_then(RegistryBuilder::prepare) + { + let resolver = registry + .resolver(uri::from_str("http://example.com/schema.json").unwrap()); + let _resolved = resolver.lookup(reference); } } } diff --git a/profiler/Cargo.toml b/profiler/Cargo.toml index f3441d66..5d465b2b 100644 --- a/profiler/Cargo.toml +++ b/profiler/Cargo.toml @@ -19,3 +19,7 @@ dhat-heap = [] [profile.release] debug = true + +[profile.profiling] +inherits = "release" +debug = true diff --git a/profiler/Justfile b/profiler/Justfile index ca7ac5db..9da94897 100644 --- a/profiler/Justfile +++ b/profiler/Justfile @@ -31,7 +31,7 @@ flame-custom method schema instance iterations="10000": @echo "Opening custom-{{method}}.svg in browser..." @xdg-open custom-{{method}}.svg 2>/dev/null || open custom-{{method}}.svg 2>/dev/null || echo "Please open custom-{{method}}.svg manually" -# Profile with custom schema only (no instance needed for build/registry methods) +# Profile with custom schema only (no instance needed for compile method) flame-schema method schema iterations="10000": CARGO_PROFILE_BENCH_DEBUG=true cargo flamegraph \ -o schema-{{method}}.svg \ @@ -80,6 +80,9 @@ fast-invalid-iter-errors: (flame "fast-invalid" "iter_errors" "10000") fast-invalid-evaluate: (flame "fast-invalid" "evaluate" "10000") registry: (flame "citm" "registry" "1000") +registry-owned: (flame "citm" "registry-owned" "1000") +registry-owned-specifications: (flame "openapi" "registry-owned-with-specifications" "1000") +fhir-build: (flame "fhir" "build" "500") dhat-citm-build: (dhat "citm" "build" "10000") dhat-citm-is-valid: (dhat "citm" "is_valid" "10000") diff --git a/profiler/src/main.rs b/profiler/src/main.rs index 7ddfaa13..6ba36444 100644 --- a/profiler/src/main.rs +++ b/profiler/src/main.rs @@ -1,4 +1,5 @@ -use referencing::{Draft, Registry}; +use jsonschema::Registry; +use referencing::{Draft, SPECIFICATIONS}; use serde_json::Value; use std::fs; @@ -10,6 +11,7 @@ struct Args { iterations: usize, schema_path: String, instance_path: Option, + draft: Option, method: String, } @@ -18,28 +20,58 @@ fn main() -> Result<(), Box> { // Handle presets let preset = pico_args.value_from_str::<_, String>("--preset").ok(); - let (schema_path, instance_path) = if let Some(preset) = preset { + let (schema_path, instance_path, draft) = if let Some(preset) = preset { match preset.as_str() { - "openapi" => ("../crates/benchmark/data/openapi.json".to_string(), Some("../crates/benchmark/data/zuora.json".to_string())), - "swagger" => ("../crates/benchmark/data/swagger.json".to_string(), Some("../crates/benchmark/data/kubernetes.json".to_string())), - "geojson" => ("../crates/benchmark/data/geojson.json".to_string(), Some("../crates/benchmark/data/canada.json".to_string())), - "citm" => ("../crates/benchmark/data/citm_catalog_schema.json".to_string(), Some("../crates/benchmark/data/citm_catalog.json".to_string())), - "fast-valid" => ("../crates/benchmark/data/fast_schema.json".to_string(), Some("../crates/benchmark/data/fast_valid.json".to_string())), - "fast-invalid" => ("../crates/benchmark/data/fast_schema.json".to_string(), Some("../crates/benchmark/data/fast_invalid.json".to_string())), - _ => return Err(format!("Unknown preset: {}. Available: openapi, swagger, geojson, citm, fast-valid, fast-invalid", preset).into()), + "openapi" => ( + "../crates/benchmark/data/openapi.json".to_string(), + Some("../crates/benchmark/data/zuora.json".to_string()), + Some(Draft::Draft4), + ), + "swagger" => ( + "../crates/benchmark/data/swagger.json".to_string(), + Some("../crates/benchmark/data/kubernetes.json".to_string()), + Some(Draft::Draft4), + ), + "geojson" => ( + "../crates/benchmark/data/geojson.json".to_string(), + Some("../crates/benchmark/data/canada.json".to_string()), + Some(Draft::Draft4), + ), + "citm" => ( + "../crates/benchmark/data/citm_catalog_schema.json".to_string(), + Some("../crates/benchmark/data/citm_catalog.json".to_string()), + Some(Draft::Draft4), + ), + "fast-valid" => ( + "../crates/benchmark/data/fast_schema.json".to_string(), + Some("../crates/benchmark/data/fast_valid.json".to_string()), + Some(Draft::Draft7), + ), + "fast-invalid" => ( + "../crates/benchmark/data/fast_schema.json".to_string(), + Some("../crates/benchmark/data/fast_invalid.json".to_string()), + Some(Draft::Draft7), + ), + "fhir" => ( + "../crates/benchmark/data/fhir.schema.json".to_string(), + None, + None, + ), + _ => return Err(format!("Unknown preset: {}. Available: openapi, swagger, geojson, citm, fast-valid, fast-invalid, fhir", preset).into()), } } else { let schema_path = pico_args .value_from_str("--schema") .map_err(|_| "--schema is required when not using --preset")?; let instance_path = pico_args.value_from_str("--instance").ok(); - (schema_path, instance_path) + (schema_path, instance_path, None) }; let args = Args { iterations: pico_args.value_from_str("--iterations")?, schema_path, instance_path, + draft, method: pico_args.value_from_str("--method")?, }; @@ -51,29 +83,47 @@ fn main() -> Result<(), Box> { let schema_str = fs::read_to_string(&args.schema_path)?; let schema: Value = serde_json::from_str(&schema_str)?; + let draft = args.draft.unwrap_or_else(|| Draft::default().detect(&schema)); + + // To initialise metaschema validators + let _ = &*SPECIFICATIONS; #[cfg(feature = "dhat-heap")] let _profiler = dhat::Profiler::new_heap(); match args.method.as_str() { "build" => { - if args.instance_path.is_some() { - return Err("--instance is not supported for 'build' method".into()); - } for _ in 0..args.iterations { let _ = jsonschema::validator_for(&schema)?; } } "registry" => { - if args.instance_path.is_some() { - return Err("--instance is not supported for 'registry' method".into()); + for _ in 0..args.iterations { + let _ = Registry::new() + .extend([("http://example.com/schema", &schema)]) + .expect("Invalid resource") + .prepare() + .expect("Failed to build registry"); } + } + "registry-owned" => { + for _ in 0..args.iterations { + let resource = draft.create_resource(schema.clone()); + let _ = Registry::new() + .add("http://example.com/schema", resource) + .expect("Invalid resource") + .prepare() + .expect("Failed to build registry"); + } + } + "registry-owned-with-specifications" => { for _ in 0..args.iterations { - let input_resources = vec![( - "http://example.com/schema", - Draft::Draft202012.create_resource(schema.clone()), - )]; - let _ = Registry::try_from_resources(input_resources.into_iter()); + let resource = draft.create_resource(schema.clone()); + let _ = SPECIFICATIONS + .add("http://example.com/schema", resource) + .expect("Invalid resource") + .prepare() + .expect("Failed to build registry"); } } "is_valid" | "validate" | "iter_errors" | "evaluate" => { @@ -116,7 +166,8 @@ fn main() -> Result<(), Box> { } _ => { return Err( - "Invalid method. Use 'registry', 'build', 'is_valid', 'validate', 'iter_errors', or 'evaluate'".into() + "Invalid method. Use 'registry', 'registry-owned', 'registry-owned-with-specifications', 'build', 'is_valid', 'validate', 'iter_errors', or 'evaluate'" + .into(), ); } }